Exemple #1
0
def import_feature_extractor(backend, input_size):
    if backend == 'Inception3':
        feature_extractor = Inception3Feature(input_size)  
    elif backend == 'SqueezeNet':
        feature_extractor = SqueezeNetFeature(input_size)        
    elif backend == 'MobileNet':
        feature_extractor = MobileNetFeature(input_size)
    elif backend == 'Full Yolo':
        feature_extractor = FullYoloFeature(input_size)
    elif backend == 'Tiny Yolo':
        feature_extractor = TinyYoloFeature(input_size)
    elif backend == 'VGG16':
        feature_extractor = VGG16Feature(input_size)
    elif backend == 'ResNet50':
        feature_extractor = ResNet50Feature(input_size)
    elif os.path.dirname(backend) != "":
        basePath = os.path.dirname(backend)
        sys.path.append(basePath)
        custom_backend_name = os.path.basename(backend)
        custom_backend = import_dynamically(custom_backend_name)
        feature_extractor = custom_backend(input_size)
        if not issubclass(custom_backend,BaseFeatureExtractor):
            raise RuntimeError('You are trying to import a custom backend, your backend must'
            ' be in inherited from "backend.BaseFeatureExtractor".')
        print('Using a custom backend called {}.'.format(custom_backend_name))
    else:
        raise RuntimeError('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet,' 
            'SqueezeNet, VGG16, ResNet50, or Inception3 at the moment!')

    return feature_extractor
    def __init__(self, architecture, input_size, labels, max_box_per_image,
                 anchors):

        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = 5
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if architecture == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif architecture == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        else:
            raise Exception(
                'Architecture not supported! Please use Full Yolo or Tiny Yolo!'
            )

        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='conv_23',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                              self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()
Exemple #3
0
    def __init__(self, backend, input_size, labels, max_box_per_image,
                 anchors):
        """

        :param backend: 特征提取器
        :param input_size: 输入图像的维度
        :param labels: 标签
        :param max_box_per_image: 每张图像最多所拥有的框数量
        :param anchors: 锚框
        """

        self.input_size = input_size
        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype=np.float32)
        self.anchors = anchors
        self.max_box_per_image = max_box_per_image

        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        self.feature_extractor = FullYoloFeature(self.input_size)
        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        # 创建物体检测层
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='DetectionLayer',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # 初始化物体检测层的参数
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_w *
                                                              self.grid_h)

        layer.set_weights([new_kernel, new_bias])

        # 打印模型的摘要信息
        self.model.summary()
Exemple #4
0
    def __init__(self, backend,
                 input_size,
                 labels,
                 max_box_per_image,
                 anchors):

        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_size)
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_size)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_size)
        else:
            raise Exception(
                'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
                        (1, 1), strides=(1, 1),
                        padding='same',
                        name='DetectionLayer',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()
    def __init__(self, backend, input_size, labels, max_box_per_image, anchors, verbose=1):
        ##########################
        # Save the network parameters
        ##########################
        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################
        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(input_size=self.input_size)
        elif backend == 'Squeezenet':
            self.feature_extractor = SqueezeNetFeature(input_size=self.input_size)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(input_size=self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(input_size=self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(input_size=self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(input_size=self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(input_size=self.input_size)
        else:
            raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        print('Feature extractor shape: {}'.format(self.feature_extractor.get_output_shape()))
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image=input_image)

        # make the object detection layer
        output = Conv2D(filters=self.nb_box * (4 + 1 + self.nb_class),
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        padding='same',
                        kernel_initializer='lecun_normal',
                        name='DetectionLayer')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)

        # small hack to allow true_boxes to be registered when Keras build the model
        # for more information: https://github.com/fchollet/keras/issues/2790
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # ??? Why have to redefine Conv2D_DetectionLayer as below?
        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h * self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h * self.grid_w)
        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        if verbose: self.model.summary()
Exemple #6
0
    def __init__(self, backend, input_width, input_height, input_channel,
                 labels, max_box_per_image, anchors, saved_config_name):
        self.input_width = input_width
        self.input_height = input_height
        self.input_channel = input_channel

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = len(anchors) // 2  # each anchor has 2 (w,h) number.
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors
        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################
        # models.model_1(self.input_height, self.input_width, self.input_channel, \
        #                self.max_box_per_image, self.nb_box, self.nb_class)
        # make the feature extractor layers
        input_image = Input(shape=(self.input_height, self.input_width,
                                   self.input_channel))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_height,
                                                      self.input_width,
                                                      self.input_channel)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_height,
                                                     self.input_width,
                                                     self.input_channel)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_height,
                                                     self.input_width,
                                                     self.input_channel)
        elif backend == 'Tiny Yolo_1':
            self.feature_extractor = TinyYoloFeature_1(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_2':
            self.feature_extractor = TinyYoloFeature_2(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_3':
            self.feature_extractor = TinyYoloFeature_3(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_4':
            self.feature_extractor = TinyYoloFeature_4(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'Tiny Yolo_5':
            self.feature_extractor = TinyYoloFeature_5(self.input_height,
                                                       self.input_width,
                                                       self.input_channel)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_height,
                                                  self.input_width)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_height,
                                                     self.input_width)
        elif backend == 'My Yolo':
            self.feature_extractor = MyYoloFeature(self.input_height,
                                                   self.input_width)
        else:
            raise Exception(
                'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!'
            )

        # print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()

        #features = self.feature_extractor.extract(input_image)
        features = self.feature_extractor.feature_extractor.output

        # make the object detection layer
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='DetectionLayer',
                        kernel_initializer='lecun_normal')(features)
        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        #self.model = Model([input_image, self.true_boxes], output)
        self.model = Model(
            [self.feature_extractor.feature_extractor.input, self.true_boxes],
            output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                              self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # save model config
        model_json = self.model.to_json()
        with open(str(saved_config_name), "w") as json_file:
            json_file.write(model_json)

        # print a summary of the whole model
        self.feature_extractor.feature_extractor.summary()
        self.model.summary()
    def __init__(self, architecture, input_size, labels, max_box_per_image,
                 anchors):

        self.input_size = input_size

        self.labels = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box = 5
        self.class_wt = np.ones(self.nb_class, dtype='float32')
        self.anchors = anchors

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        input_image = Input(shape=(self.input_size, self.input_size, 3))
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

        if architecture == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)

        else:
            raise Exception(
                'Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!'
            )

        print self.feature_extractor.get_output_shape()
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()
        features = self.feature_extractor.extract(input_image)

        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), (1, 1),
                        strides=(1, 1),
                        padding='same',
                        name='conv_23',
                        kernel_initializer='lecun_normal')(features)
        # output = Conv2D(1024,
        #                 (3,3), strides=(1,1),
        #                 padding='same',
        #                 name='conv_23',
        #                 kernel_initializer='lecun_normal',
        #                 use_bias=False)(features)

        # output = Flatten()(output)
        # output = Dense(512)(output)
        # # output = Dense(16384)(output)
        # output = LeakyReLU(alpha=0.1)(output)
        # output = Dense(self.grid_h*self.grid_w*self.nb_box*( 4 + 1 + self.nb_class))(output)

        # make the object detection layer

        output = Reshape((self.grid_h, self.grid_w, self.nb_box,
                          4 + 1 + self.nb_class))(output)
        output = Lambda(lambda args: args[0])([output, self.true_boxes])

        self.model = Model([input_image, self.true_boxes], output)

        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()

        new_kernel = np.random.normal(size=weights[0].shape) / (self.grid_h *
                                                                self.grid_w)
        new_bias = np.random.normal(size=weights[1].shape) / (self.grid_h *
                                                              self.grid_w)

        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()
Exemple #8
0
    def __init__(self, backend,
                       input_size, 
                       labels, 
                       max_box_per_image,
                       anchors,
                       threshold,
                       max_sur):

        self.input_size = input_size
        
        self.labels   = list(labels)
        self.nb_class = len(self.labels)
        self.nb_box   = len(anchors)//2 #应该是期望的box数量?还没仔细看paper
        self.class_wt = np.ones(self.nb_class, dtype='float32') #这里没有拓展,可以各类型的修改权重
        self.anchors  = anchors
        self.threshold = threshold
        self.max_sur = max_sur

        self.max_box_per_image = max_box_per_image

        ##########################
        # Make the model
        ##########################

        # make the feature extractor layers
        # 构建了一个图片的input层
        input_image     = Input(shape=(self.input_size, self.input_size, 3))

        # 构建了bounding box回归的输入层
        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  

        # 从backend获取卷积部分的结构,这里返回的是bankend中定义的不同的几个类的对象,都是BaseFeatureExtractor类的子类
        # backend中子类的self.feature_extractor变量就是构建的keras的model对象,在backend中只是加了个包装
        # 这里的YOLO类中的变量也叫feature_extractor,但是对应的是backend中的几个子类的对象,不能弄混了
        if backend == 'Inception3':
            self.feature_extractor = Inception3Feature(self.input_size)  
        elif backend == 'SqueezeNet':
            self.feature_extractor = SqueezeNetFeature(self.input_size)        
        elif backend == 'MobileNet':
            self.feature_extractor = MobileNetFeature(self.input_size)
        elif backend == 'Full Yolo':
            self.feature_extractor = FullYoloFeature(self.input_size)
        elif backend == 'Tiny Yolo':
            self.feature_extractor = TinyYoloFeature(self.input_size)
        elif backend == 'VGG16':
            self.feature_extractor = VGG16Feature(self.input_size)
        elif backend == 'ResNet50':
            self.feature_extractor = ResNet50Feature(self.input_size)
        else:
            raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!')

        # 通过在父类定义的get_output_shape()获得输出的特征矩阵的大小
        print(issubclass(Model,Layer))
        print(self.feature_extractor.get_output_shape())
        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()

        # 这个extract()和get_output_shape()一样也是backend中BaseFeatureExtractor类定义的父类方法
        # 这里是把上面定义的图片输入层和模型的特征提取模块接在一起
        # 看backen的代码其实这一步有点多余,因为backend已经有输入层了,可能应为方便?我觉得这个操作可以去掉
        # 总之,这里的features就是一个构建到一半的模型(的特征提取部分),如果直接调用predict出来的是一个特征矩阵
        features = self.feature_extractor.extract(input_image)            

        # make the object detection layer
        # 构造模型的分类层
        # 输出的shape为:
        # self.nb_box * (4 + 1 + self.nb_class), self.grid_h, self.grid_w)
        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class),
                        (1,1), strides=(1,1), 
                        padding='same', 
                        name='DetectionLayer', 
                        kernel_initializer='lecun_normal')(features)
        # 有13*13组对每个bounding box(max/2个)的predict
        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)

        #加的这层lambda层贼奇怪,把true_boxes放进来以后又不取,相当于没放进来,不知道有啥用
        #注意,这里隐形的加了一个input-layer,对于true_boxes的输入
        output = Lambda(lambda args: args[0])([output, self.true_boxes])
        self.model = Model([input_image, self.true_boxes], output)

        """
        这里要注意的是:
        现在的model是一个:
        input->BACKEND->Covn2d->Reshape->(input)->Lambda的模型
        一共是6层
        虽然backend里面的构造很复杂,但是在这里被当成一个layer(因为其实MODEL对象是layer对象的子类)
        """

        #输出的shape:(self.grid_h, self.grid_w, max_box_num, dim(也就是4 + 1 + self.nb_class))
        print(self.model.layers)
        print(f"number of layers:{len(self.model.layers)}")
        print(self.model.output_shape)

        
        # initialize the weights of the detection layer
        layer = self.model.layers[-4]
        weights = layer.get_weights()
        print(f"weigth_2D:{weights}")
        # 第一个array的shape是(w,h,d(上一层传下来有多少个feature-map),number)
        # 第二个array的shape是(number)也就是说每个kernel一个bias
        print(f"weigth_2D_shape:{(weights[0].shape,weights[1].shape)}")
        new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)
        new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)
        #分类layer是高斯随机的
        layer.set_weights([new_kernel, new_bias])

        # print a summary of the whole model
        self.model.summary()