def RfbNet(include_top=True, pretrained=True, input_shape=None, base_filters=16, num_classes=1, num_regressors=4, **kwargs): if input_shape is not None and len(input_shape) == 3: input_shape = tuple(input_shape) else: input_shape = (3, 480, 640) rfbnet = SsdDetectionModel(input_shape=(3, 480, 640), output=RFBnet(base_filters=base_filters, num_classes=num_classes, num_regressors=num_regressors)) rfbnet.detection_threshold=0.7 rfbnet.nms_threshold=0.7 rfbnet.palette[0] = (128, 255, 128) rfbnet.palette[1] = (128, 255, 128) rfbnet.preprocess_flow = [ Resize((480, 640), True), Normalize(127.5, 127.5) ] if pretrained == True: download_model_from_google_drive('1T_0VYOHaxoyuG1fAxY-6g0C7pfXiujns', dirname, 'version-RFB-640.pth') recovery_model =fix_layer( load(os.path.join(dirname, 'version-RFB-640.pth'))) priors=recovery_model.priors.clone() recovery_model.__delattr__("priors") recovery_model.register_buffer("priors",priors) recovery_model.name = 'rfb640' recovery_model.eval() recovery_model.to(_device) rfbnet.model = recovery_model return rfbnet
def __init__(self, inputs=None, input_shape=None,output=None): super(YoloDetectionModel, self).__init__(inputs, input_shape,output) self.preprocess_flow = [Resize((input_shape[-2], input_shape[-1]), True), Normalize(0, 255)] self.detection_threshold = 0.5 self.nms_threshold = 0.3 self.class_names = None self.palette = generate_palette(80)
def DenseNetFcn(blocks=(4, 5, 7, 10, 12), growth_rate=16, initial_filters=64, pretrained=False, input_shape=(224, 224, 3), num_classes=10, name='', **kwargs): """ Instantiates the DenseNet FCN architecture. Optionally loads weights pre-trained on ImageNet. Args blocks (tuple/ list of int ): numbers of building blocks for the dense layers. growth_rate (int):The growth rate regulates how much new information each layer contributes to the global state initial_filters (int): the channel of the first convolution layer pretrained (bool): only False is valid for DenseNet FCN input_shape (tuple or list): the default input image size in CHW order (C, H, W) num_classes (int): number of classes name (string): anme of the model Returns A trident image segmentation model instance. """ model = ImageSegmentationModel(input_shape=input_shape, output=_DenseNetFcn2( blocks=blocks, growth_rate=growth_rate, initial_filters=initial_filters, num_classes=num_classes, name=name, **kwargs)) model.signature = Signature(name='DenseNetFcn') if is_tensor(model._input_shape): model.signature.inputs['input'] = TensorSpec(shape=model._input_shape, name='input') if is_tensor(model._output_shape): model.signature.outputs['output'] = TensorSpec( shape=model._output_shape, name='output') model.preprocess_flow = [ Resize((input_shape[2], input_shape[1]), keep_aspect=True), Normalize(0, 255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] # model.summary() return model
def InceptionV3(include_top=True, pretrained=True, freeze_features=False, input_shape=(3, 224, 224), classes=1000, **kwargs): if input_shape is not None and len(input_shape) == 3: input_shape = tuple(input_shape) else: input_shape = (3, 224, 224) model = ImageClassificationModel(input_shape=input_shape, output=inception_v3( input_shape, model_name='inception_v3', include_top=include_top, num_classes=classes)) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names = labels model.preprocess_flow = [ Resize((input_shape[2], input_shape[1]), keep_aspect=True), Normalize(0, 255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] # if pretrained: # download_model_from_google_drive('1bxnoDerzoNfiZZLft4ocD3DAgx4v6aTN', dirname, 'efficientnet-b0.pth') # recovery_model = fix_layer(load(os.path.join(dirname, 'efficientnet-b0.pth'))) # recovery_model = _make_recovery_model_include_top(recovery_model,input_shape=input_shape, include_top=include_top, classes=classes, freeze_features=freeze_features) # effb0.model = recovery_model # else: # effb0.model = _make_recovery_model_include_top( effb0.model, include_top=include_top, classes=classes, freeze_features=False) # # effb0.model .input_shape = input_shape # effb0.model .to(get_device()) return model
def SEResNet_IR(include_top=True,num_layers=50,Bottleneck=BottleNeck_IR_SE,drop_ratio=0.4,feature_dim=128,input_shape=(3,112,112)): blocks=OrderedDict() blocks['input_layer']=Conv2d_Block((3,3),64,strides=1,auto_pad=True,use_bias=False,activation=PRelu(64),normalization='batch',name='input_layer') blocks['body']=Sequential( get_block(Bottleneck, out_channel=64, num_units=3,keep_filter=True)+ get_block(Bottleneck, out_channel=128, num_units=4,keep_filter=False)+ get_block(Bottleneck, out_channel=256, num_units=14,keep_filter=False)+ get_block(Bottleneck, out_channel=512, num_units=3,keep_filter=False) ) blocks['output_layer']=Sequential( BatchNorm2d(), Dropout(drop_ratio), Flatten(), Dense(feature_dim), BatchNorm(), name='output_layer' ) facenet=Sequential(blocks).to(_device) facenet.name=camel2snake('SEResNet_IR') model=FaceRecognitionModel(input_shape=input_shape,output=facenet) model.preprocess_flow=[Resize((input_shape[1],input_shape[2]),keep_aspect=True),Normalize(0,255),Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])] #model.summary() return model
def make_vgg_layers(cfg, num_classes=1000, input_shape=(224, 224, 3), include_top=True): layers = [] in_channels = 3 block = 1 conv = 1 vgg = Sequential() for v in cfg: if v == 'M': vgg.add_module( 'block{0}_pool'.format(block), MaxPool2d(kernel_size=2, strides=2, use_bias=True, name='block{0}_pool'.format(block))) block += 1 conv = 1 else: if len(vgg) == 0: vgg.add_module( 'block{0}_conv{1}'.format(block, conv), Conv2d((3, 3), v, auto_pad=True, activation=None, use_bias=True, name='block{0}_conv{1}'.format(block, conv))) else: vgg.add_module( 'block{0}_conv{1}'.format(block, conv), Conv2d((3, 3), v, auto_pad=True, activation=None, use_bias=True, name='block{0}_conv{1}'.format(block, conv))) vgg.add_module('block{0}_relu{1}'.format(block, conv), Relu(name='block{0}_relu{1}'.format(block, conv))) conv += 1 in_channels = v if include_top == True: vgg.add_module('flattened', Flatten()) vgg.add_module('fc1', Dense(4096, use_bias=True, activation='relu')) vgg.add_module('drop1', Dropout(0.5)) vgg.add_module('fc2', Dense(4096, use_bias=True, activation='relu')) vgg.add_module('drop2', Dropout(0.5)) vgg.add_module('fc3', Dense(num_classes, use_bias=True, activation='softmax')) model = ImageClassificationModel(input_shape=input_shape, output=vgg) model.signature = get_signature(model.model.forward) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names = labels model.preprocess_flow = [ Resize((input_shape[0], input_shape[1]), keep_aspect=True), to_bgr(), Normalize([103.939, 116.779, 123.68], [1, 1, 1]) ] # model.summary() return model
def MobileNet(input_shape=(224, 224, 3), classes=1000, use_bias=False, width_mult=1.0, round_nearest=8, include_top=True, model_name='', **kwargs): input_filters = 32 last_filters = 1280 mobilenet = Sequential(name='mobilenet') inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] input_filters = _make_divisible(input_filters * width_mult, round_nearest) last_filters = _make_divisible(last_filters * max(1.0, width_mult), round_nearest) features = [] features.append( Conv2d_Block((3, 3), num_filters=input_filters, strides=2, auto_pad=True, padding_mode='zero', normalization='batch', activation='relu6', name='first_layer')) for t, c, n, s in inverted_residual_setting: output_filters = _make_divisible(c * width_mult, round_nearest) for i in range(n): strides = s if i == 0 else 1 features.append( inverted_residual(input_filters, num_filters=output_filters, strides=strides, expansion=t, name='irb_{0}'.format(i))) input_filters = output_filters features.append( Conv2d_Block((1, 1), last_filters, auto_pad=True, padding_mode='zero', normalization='batch', activation='relu6', name='last_layer')) mobilenet.add_module('features', Sequential(*features, name='features')) if include_top: mobilenet.add_module('gap', GlobalAvgPool2d()) mobilenet.add_module('drop', Dropout(0.2)) mobilenet.add_module('fc', Dense((classes), activation=None)) mobilenet.add_module('softmax', SoftMax(name='softmax')) model = ImageClassificationModel(input_shape=input_shape, output=mobilenet) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names = labels model.preprocess_flow = [ Resize((224, 224), keep_aspect=True), Normalize(127.5, 127.5) ] # model.summary() return model
def EfficientNet(width_coefficient, depth_coefficient, input_shape, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, model_name='efficientnet', include_top=True, num_classes=1000, **kwargs): """Instantiates the EfficientNet architecture using given scaling coefficients. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Args width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_size: integer, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: integer, a unit of network width. activation_fn: activation function. blocks_args: list of dicts, parameters to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. num-classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns A Efficientnet model instance. """ default_block_args = deepcopy(DEFAULT_BLOCKS_ARGS) def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = builtins.max( divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) flow_list = [] efficientnet = Sequential(name=model_name) efficientnet.add_module( 'stem', Conv2d_Block((3, 3), round_filters(32), strides=2, use_bias=False, auto_pad=True, padding_mode='zero', normalization='batch', activation='swish', name='stem')) b = 0 blocks = float(builtins.sum(args['repeats'] for args in default_block_args)) for (i, args) in enumerate(default_block_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. # args['filters_in'] = round_filters(args['filters_in']) # args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] efficientnet.add_module( 'block{}{}'.format(i + 1, chr(j + 97)), efficient_block(expand_ratio=args['expand_ratio'], filters_in=round_filters(args['filters_in']), filters_out=round_filters(args['filters_out']), kernel_size=args['kernel_size'], strides=args['strides'], zero_pad=0, se_ratio=args['se_ratio'], drop_connect_rate=drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)))), b += 1 efficientnet.add_module( 'top_conv', Conv2d_Block((1, 1), round_filters(1280), strides=1, use_bias=False, auto_pad=True, padding_mode='zero', normalization='batch', activation='swish', name='top_conv')) efficientnet.add_module('avg_pool', GlobalAvgPool2d(name='avg_pool')) if include_top: if dropout_rate > 0: efficientnet.add_module('top_dropout', Dropout(dropout_rate, name='top_dropout')) efficientnet.add_module('fc', Dense(num_classes, activation=None, name='fc')) efficientnet.add_module('softmax', SoftMax(name='softmax')) model = ImageClassificationModel(input_shape=input_shape, output=efficientnet) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names = labels model.preprocess_flow = [ Resize((input_shape[2], input_shape[1]), keep_aspect=True), Normalize(0, 255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] # model.summary() return model
def forward(self, x, scale): inp = x.exand_dims(0) boxes = self.pnet(inp) boxes_list = [] if boxes is not None and len(boxes) > 0: box = boxes[:, :4] / scale score = boxes[:, 4:] boxes = concate([box.round_(), score], axis=1) if len(boxes) > 0: boxes_list.append(boxes) ####################################### #########pnet finish ####################################### if len(boxes_list) > 0: boxes = to_tensor(concate(boxes_list, axis=0)) # print('total {0} boxes in pnet in all scale '.format(len(boxes))) boxes = clip_boxes_to_image(boxes, (x.shape[0], x.shape[1])) boxes = nms(boxes, threshold=self.detection_threshold[0]) print('pnet:{0} boxes '.format(len(boxes))) # print('total {0} boxes after nms '.format(len(boxes))) # score = to_numpy(boxes[:, 4]).reshape(-1) if boxes is not None: # prepare rnet input boxes = self.rerec(boxes, x.shape) new_arr = np.zeros((boxes.shape[0], 3, 24, 24)) for k in range(boxes.shape[0]): box = boxes[k] crop_img = x.copy()[int(box[1]):int(box[3]), int(box[0]):int(box[2]), :] if crop_img.shape[0] > 0 and crop_img.shape[1] > 0: new_arr[k] = Resize( (24, 24))(crop_img / 255.0).transpose([2, 0, 1]) # else: # print(box) new_arr = to_tensor(new_arr) r_output1_list = [] r_output2_list = [] r_output3_list = [] if len(new_arr) > 16: for i in range(len(new_arr) // 16 + 1): if i * 16 < len(new_arr): r_out1, r_out2, r_out3 = self.rnet( new_arr[i * 16:(i + 1) * 16, :, :, :]) r_output1_list.append(r_out1) r_output2_list.append(r_out2) r_output3_list.append(r_out3) r_out1 = concate(r_output1_list, axis=0) r_out2 = concate(r_output2_list, axis=0) r_out3 = concate(r_output3_list, axis=0) else: r_out1, r_out2, r_out3 = self.rnet(new_arr) probs = to_numpy(r_out1) keep = np.where(probs[:, 0] > self.detection_threshold[1])[0] r_out1 = r_out1[keep] boxes = boxes[keep] boxes[:, 4] = r_out1[:, 0] r_out2 = r_out2[keep] boxes = calibrate_box(boxes, r_out2) ####################################### #########rnet finish ####################################### boxes = nms(boxes, threshold=self.detection_threshold[1], image_size=(x.shape[0], x.shape[1]), min_size=self.min_size) print('rnet:{0} boxes '.format(len(boxes))) # print('total {0} boxes after nms '.format(len(boxes))) boxes = clip_boxes_to_image(boxes, (x.shape[0], x.shape[1])) boxes = self.rerec(boxes, x.shape) new_arr = np.zeros((boxes.shape[0], 3, 48, 48)) for k in range(boxes.shape[0]): box = boxes[k] crop_img = x.copy()[int(box[1]):int(box[3]), int(box[0]):int(box[2]), :] if crop_img.shape[0] > 0 and crop_img.shape[1] > 0: new_arr[k] = Resize( (48, 48))(crop_img / 255.0).transpose([2, 0, 1]) # else: # print(box) new_arr = to_tensor(new_arr) o_out1, o_out2, o_out3 = self.onet(new_arr) probs = to_numpy(o_out1) keep = np.where(probs[:, 0] > self.detection_threshold[2])[0] o_out1 = o_out1[keep] boxes = boxes[keep] boxes[:, 4] = o_out1[:, 0] o_out2 = o_out2[keep] o_out3 = o_out3[keep] boxes = calibrate_box(boxes, o_out2) landmarks_x = boxes[:, 0:1] + o_out3[:, 0::2] * ( boxes[:, 2:3] - boxes[:, 0:1] + 1) landmarks_y = boxes[:, 1:2] + o_out3[:, 1::2] * ( boxes[:, 3:4] - boxes[:, 1:2] + 1) boxes = concate([boxes, landmarks_x, landmarks_y], axis=-1)
def ResNet(block, layers, input_shape=(224, 224,3), num_classes=1000, use_bias=False, include_top=True, model_name='', **kwargs): """Instantiates the ResNet, ResNetV2, and ResNeXt architecture. Args block: a function that returns output tensor for the stacked residual blocks. layers: list of integer, the number of repeat units in each blocks. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` It should have exactly 3 inputs channels. num_classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. use_bias: whether to use biases for convolutional layers or not (True for ResNet and ResNetV2, False for ResNeXt). include_top: whether to include the fully-connected layer at the top of the network. model_name: string, model name. Returns A Keras model instance. Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ def _make_layer(block, num_filters, blocklayers, strides=1, dilate=False,use_bias=use_bias,layer_name=''): layers = OrderedDict() layers['0']=block(num_filters=num_filters, strides=strides, expansion = 4, conv_shortcut=True,use_bias=use_bias, name=layer_name+'1') for k in range(1, blocklayers): layers['{0}'.format(k)]=block(num_filters=num_filters, strides=1, expansion = 4, conv_shortcut=False, use_bias=use_bias,name=layer_name+'{0}'.format(k+1)) laters_block=Sequential(layers) laters_block._name=layer_name return laters_block flow_list=[] resnet = Sequential() resnet.add_module('conv1',Conv2d_Block((7,7),64,strides=2,use_bias=use_bias,auto_pad=True,padding_mode='zero',normalization='batch',activation='relu',name='first_block')) resnet.add_module('maxpool',(MaxPool2d((3,3),strides=2,auto_pad=True,padding_mode='zero'))) resnet.add_module('layer1',(_make_layer(block, 64, layers[0],strides=1, dilate=None,use_bias=use_bias,layer_name='layer1' ))) resnet.add_module('layer2',(_make_layer(block, 128, layers[1], strides=2, dilate=None,use_bias=use_bias,layer_name='layer2' ))) resnet.add_module('layer3',(_make_layer(block, 256, layers[2], strides=2, dilate=None,use_bias=use_bias,layer_name='layer3' ))) resnet.add_module('layer4' ,(_make_layer(block, 512, layers[3], strides=2, dilate=None,use_bias=use_bias,layer_name='layer4' ))) resnet.add_module('avg_pool',GlobalAvgPool2d(name='avg_pool')) if include_top: resnet.add_module('fc',Dense(num_classes,activation=None,name='fc')) resnet.add_module('softmax', SoftMax(name='softmax')) resnet._name=model_name model=ImageClassificationModel(input_shape=input_shape,output=resnet) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)) ,'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names=labels input_np_shape=to_numpy(input_shape) model.preprocess_flow=[Resize((input_np_shape[0],input_np_shape[1]),keep_aspect=True), to_bgr(), Normalize([103.939, 116.779, 123.68], [1, 1, 1])] #model.summary() return model
def DenseNet(blocks, growth_rate=32, initial_filters=64, include_top=True, pretrained=True, input_shape=(224, 224, 3), num_classes=1000, name='', **kwargs): """' Instantiates the DenseNet architecture. Optionally loads weights pre-trained on ImageNet. Args blocks (tuple/ list of int ): numbers of building blocks for the dense layers. growth_rate (int):The growth rate regulates how much new information each layer contributes to the global state initial_filters (int): the channel of the first convolution layer pretrained (bool): If True, returns a model pre-trained on ImageNet. input_shape (tuple or list): the default input image size in CHW order (C, H, W) num_classes (int): number of classes name (string): anme of the model Returns A trident image classification model instance. """ densenet = Sequential() densenet.add_module( 'conv1/conv', Conv2d_Block((7, 7), num_filters=initial_filters, strides=2, use_bias=False, auto_pad=True, padding_mode='zero', activation='relu', normalization='batch', name='conv1/conv')) densenet.add_module('maxpool', (MaxPool2d( (3, 3), strides=2, auto_pad=True, padding_mode='zero'))) densenet.add_module('denseblock1', DenseBlock(blocks[0], growth_rate=growth_rate)) densenet.add_module('transitiondown1', Transition(0.5)) densenet.add_module('denseblock2', DenseBlock(blocks[1], growth_rate=growth_rate)) densenet.add_module('transitiondown2', Transition(0.5)) densenet.add_module('denseblock3', DenseBlock(blocks[2], growth_rate=growth_rate)) densenet.add_module('transitiondown3', Transition(0.5)) densenet.add_module('denseblock4', DenseBlock(blocks[3], growth_rate=growth_rate)) densenet.add_module('classifier_norm', BatchNorm2d(name='classifier_norm')) densenet.add_module('classifier_relu', Relu(name='classifier_relu')) densenet.add_module('avg_pool', GlobalAvgPool2d(name='avg_pool')) if include_top: densenet.add_module( 'classifier', Dense(num_classes, activation=None, name='classifier')) densenet.add_module('softmax', SoftMax(name='softmax')) densenet.name = name model = ImageClassificationModel(input_shape=input_shape, output=densenet) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names = labels model.preprocess_flow = [ Resize((input_shape[0], input_shape[1]), keep_aspect=True), Normalize(0, 255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] # model.summary() return model