Esempio n. 1
0
def get_model(num_classes, device="cuda:0"):

    # 加载backbone时不需要加载官方模型
    backbone = resnet_fpn_backbone('resnet50',
                                   pretrained=False,
                                   trainable_layers=3)
    model = FasterRCNN(backbone, num_classes=91)  # 加载官方模型,这里num_classes不能修改

    # 加载fasterrcnn_resnet50_fpn官方模型
    # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
    weights_dict = torch.load(
        "C:/Users/lixiao/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
        map_location=device)
    missing_keys, unexpected_keys = model.load_state_dict(weights_dict,
                                                          strict=False)
    overwrite_eps(model, 0.0)

    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, num_classes)  # 修改predictor

    return model
Esempio n. 2
0
    def __init__(self, num_classes):
        self.model_dir = FLAGS.model_path
        self.cnn_net = Vgg16()

        with tf.device("/gpu:0"):
            self.faster_rcnn = FasterRCNN(self.cnn_net, num_classes, batch_size=BATCH_SIZE, is_training=False)
            self.faster_rcnn.build(mode='predict')
        self._initialize()
Esempio n. 3
0
def get_model(num_classes, device="cuda:0"):

    # # https://download.pytorch.org/models/vgg16-397923af.pth
    # # 如果使用mobilenetv2的话就下载对应预训练权重并注释下面三行,接着把mobilenetv2模型对应的两行代码注释取消掉
    # vgg_feature = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features
    # backbone = torch.nn.Sequential(*list(vgg_feature._modules.values())[:-1])  # 删除feature中最后的maxpool层
    # backbone.out_channels = 512

    # # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
    # # backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features
    # # backbone.out_channels = 1280  # 设置对应backbone输出特征矩阵的channels

    # anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
    #                                     aspect_ratios=((0.5, 1.0, 2.0),))

    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],  # 在哪些特征层上进行roi pooling
    #                                                 output_size=[7, 7],   # roi_pooling输出特征矩阵尺寸
    #                                                 sampling_ratio=2)  # 采样率

    # model = FasterRCNN(backbone=backbone,
    #                    num_classes=num_classes,
    #                    rpn_anchor_generator=anchor_generator,
    #                    box_roi_pool=roi_pooler)

    # 加载backbone时不需要加载官方模型
    backbone = resnet_fpn_backbone('resnet50',
                                   pretrained=False,
                                   trainable_layers=3)
    model = FasterRCNN(backbone, num_classes=91)  # 加载官方模型,这里num_classes不能修改

    # 加载fasterrcnn_resnet50_fpn官方模型
    # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
    weights_dict = torch.load(
        "C:/Users/lixiao/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
        map_location=device)
    missing_keys, unexpected_keys = model.load_state_dict(weights_dict,
                                                          strict=False)
    overwrite_eps(model, 0.0)

    if len(missing_keys) != 0 or len(unexpected_keys) != 0:
        print("missing_keys: ", missing_keys)
        print("unexpected_keys: ", unexpected_keys)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(
        in_features, num_classes)  # 修改predictor

    return model
Esempio n. 4
0
    def __init__(self, imdb, roidb, val_imdb, val_roidb, pretrain_model):
        self.imdb = imdb
        self.roidb = roidb
        self.val_imdb = val_imdb
        self.val_roidb = val_roidb
        self.pretrain_model = pretrain_model
        self.model_dir = FLAGS.model_path
        self.log_dir = FLAGS.log_path
        self.val_log_dir = FLAGS.val_log_path

        self.cnn_net = Vgg16()
        with tf.device("/gpu:0"):
            self.faster_rcnn = FasterRCNN(self.cnn_net, self.imdb.num_classes, batch_size=config.BATCH_SIZE, is_training=True)
            self.faster_rcnn.build(mode='train')

        variables = tf.global_variables()
        print ("all var:",variables)
Esempio n. 5
0
def main():
    args = arguments.parse_args()

    dataset = VOCDataSet("data", args.dataset, enabled_flip=False)
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=5,
                            collate_fn=collate_fn)

    print("[+] Bootstrapping model")
    print("[+] Loading rpn model from %s" % args.rpn)
    print("[+] Loading fast rcnn model from %s" % args.fastrcnn)
    fast_rcnn_checkpoint = torch.load(args.fastrcnn)
    rpn_checkpoint = torch.load(args.rpn)
    fast_rcnn = FasterRCNN().cuda()
    fast_rcnn.load_state_dict(fast_rcnn_checkpoint["state_dict"])
    fast_rcnn.eval()
    rpn = RegionProposalNetwork().cuda()
    rpn.load_state_dict(rpn_checkpoint["state_dict"])

    print("[+] Calculating Average Precision")
    ap = get_ap(rpn,
                fast_rcnn,
                dataset,
                dataloader,
                name="%s_%s" % (args.dataset, args.name))
    print("Average Precision=%.3f" % ap)
Esempio n. 6
0
class Predict():
    def __init__(self, num_classes):
        self.model_dir = FLAGS.model_path
        self.cnn_net = Vgg16()

        with tf.device("/gpu:0"):
            self.faster_rcnn = FasterRCNN(self.cnn_net, num_classes, batch_size=BATCH_SIZE, is_training=False)
            self.faster_rcnn.build(mode='predict')
        self._initialize()


    def predict(self,image, im_info):
        """Train a Faster R-CNN network."""
#allow_soft_placement=True,log_device_placement=True
        return self._predict(image,im_info)

    def _initialize(self):
        tf.set_random_seed(1234)
        random.seed(1234)

        tfconfig = tf.ConfigProto(allow_soft_placement=True)
        tfconfig.gpu_options.allow_growth = True

        self.sess = tf.Session(config=tfconfig)

        self.saver = tf.train.Saver(max_to_keep=100000)
        try:
            checkpoint_dir = self.model_dir
            print("Trying to restore last checkpoint ...:",checkpoint_dir)
            last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)
            self.saver.restore(self.sess, save_path=last_chk_path)
            print("restore last checkpoint %s done"%checkpoint_dir)
        except Exception as e:
            print e
            raise e

    def _predict(self, image, im_info):
        return self.faster_rcnn.predict(self.sess, image, im_info)
Esempio n. 7
0
def train():
    """ train """
    print('execute train')

    # TODO
    train_inputs = None
    train_teachers = None
    test_inputs = None
    test_teachers = None

    anchors = get_default_anchors()
    train_taegets = [TrainTarget.BACKBONE, TrainTarget.RPN, TrainTarget.HEAD]

    network = FasterRCNN(
        INPUT_SHAPE,
        2,
        anchors
        #, train_taegets= None, is_predict=True
        ,
        train_taegets=train_taegets,
        is_predict=False)
    model = network.get_model_with_default_compile()
    network.draw_model_summary(
        file_name=os.path.join(os.pardir, 'ModelLayers.png'))
Esempio n. 8
0
	def __init__(self, nhidden,
				 n_object_cats,
				 n_predicate_cats,
				 MPS_iter,
				 object_loss_weight,
				 predicate_loss_weight,
				 dropout=False,
				 use_kmeans_anchors=True,
				 base_model='vgg'):

		super(Hierarchical_Descriptive_Model, self).__init__(nhidden, n_object_cats, n_predicate_cats,  MPS_iter, object_loss_weight,
				 predicate_loss_weight, dropout)

		self.dropout = dropout
		# self.rpn = RPN(use_kmeans_anchors)
		self.rcnn = FasterRCNN(nhidden, use_kmeans_anchors, n_object_cats, model=base_model)
		# self.roi_pool_object = RoIPool(7, 7, 1.0/16)
		self.roi_pool_phrase = RoIAlign(7, 7, 1.0/16)
		if base_model == 'vgg':
			# self.fc6 = FC(512*7*7, nhidden)
			self.fc6_phrase = FC(512*7*7, nhidden, relu=True)
		elif base_model == 'resnet50' or base_model == 'resnet101':
			# self.fc6 = FC(1024*7*7, nhidden)
			self.fc6_phrase = FC(1024*7*7, nhidden, relu=True)
		else:
			print('please choose a model')
		# self.fc7 = FC(nhidden, nhidden, relu=True)
		self.fc7_phrase = FC(nhidden, nhidden, relu=True)
		self.spacial_conv = SpacialConv(pooling_size=32)

		if MPS_iter == 0:
			self.mps = None
		else:
			self.mps = Hierarchical_Message_Passing_Structure(nhidden, n_object_cats, n_predicate_cats) # the hierarchical message passing structure
			network.weights_normal_init(self.mps, 0.01)

		# self.score_fc = FC(nhidden, self.n_classes_obj, relu=False)
		# self.bbox_fc = FC(nhidden, self.n_classes_obj * 4, relu=False)
		self.score_fc_pred = FC(nhidden+64, self.n_classes_pred, relu=False)
		# self.bbox_pred_fc = FC(nhidden, self.n_classes_pred * 4, relu=False)


		# network.weights_normal_init(self.score_fc, 0.01)
		# network.weights_normal_init(self.bbox_fc, 0.005)
		network.weights_normal_init(self.score_fc_pred, 0.01)
Esempio n. 9
0
def main():
    global args
    args = arguments.parse_args()
    experiment_env = create_experiment_dir()

    assert args.rois_per_batch % args.batch_size == 0, "Uneven number of rois per image"
    rois_per_image = args.rois_per_batch / args.batch_size
    train_data = VOCDataSetROIs("data",
                                "train",
                                rois_per_image,
                                enabled_flip=True)
    dataloader = DataLoader(train_data,
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=5,
                            collate_fn=collate_rois_fn)

    ################### MODEL BOOTSRAP #####################
    print("[+] Bootstrapping model")
    if args.stage_2_path is not None:
        print("[+] Loading stage 2 weights")
    net = FasterRCNN(args.stage_2_path).cuda()
    net.train()

    if args.resume is not None:
        print("[+] Resuming from %s" % args.resume)
        checkpoint = torch.load(args.resume)
        net.load_state_dict(checkpoint["state_dict"])

    cross_entropy = CrossEntropyLoss(size_average=True).cuda()
    smooth_l1_loss = SmoothL1Loss(size_average=False).cuda()
    optimizer = opt.SGD(
        [params for params in net.parameters() if params.requires_grad],
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=0.0005)

    ################### MODEL TRAINING #####################
    print("[+] Training model")
    start_epoch = 0 if args.resume is None else checkpoint["epoch"]
    for epoch in xrange(start_epoch, args.epoch):
        adjust_learning_rate(optimizer, epoch)
        train(net, cross_entropy, smooth_l1_loss, optimizer, dataloader,
              experiment_env, epoch)
Esempio n. 10
0
from faster_rcnn import FasterRCNN

if __name__ == '__main__':
    test_data = VOCDetectionDataset(mode='train',
                                    use_cache=True,
                                    year='2007',
                                    bgr=True)
    wrappers = [
        lambda d: SubtractWrapper(d,
                                  value=np.array([103.939, 116.779, 123.68])),
        lambda d: ResizeWrapper(d,
                                preprocess_idx=0,
                                output_shape=output_shape_soft_min_hard_max(
                                    600, 1200),
                                hook=bbox_resize_hook(1)),
    ]
    for wrapper in wrappers:
        test_data = wrapper(test_data)

    model = FasterRCNN()
    chainer.serializers.load_npz('VGG16_faster_rcnn_final.model', model)
    trainer = mock.MagicMock()
    trainer.out = 'result'
    trainer.updater.iteration = 0

    extension = DetectionVisReport([3, 4, 5, 6, 7, 8],
                                   test_data,
                                   model,
                                   predict_func=model.predict_bboxes)
    extension(trainer)
Esempio n. 11
0
        self.m1 = nn.Conv2d(n, 18, 3, 1, 1)
        self.m2 = nn.Conv2d(n, 36, 3, 1, 1)

        #        normal_init(self.conv1, 0, 0.01)
        normal_init(self.m1, 0, 0.01)
        normal_init(self.m2, 0, 0.01)

    def forward(self, x):
        #        x = F.relu(self.conv1(x))
        return self.m1(x), self.m2(x)


def normal_init(m, mean, stddev, truncated=False):
    """
    weight initalizer: truncated normal and random normal.
    """
    # x is a parameter
    if truncated:
        m.weight.data.normal_().fmod_(2).mul_(stddev).add_(
            mean)  # not a perfect approximation
    else:
        m.weight.data.normal_(mean, stddev)
        m.bias.data.zero_()


FasterRCNN_model = FasterRCNN(
    features=Feature_extractor(sqz_feature_extractor),
    pooler=pooler,
    classifier=Classifier(),
    rpn=RPN(classifier=RPNClassifier(512)))
Esempio n. 12
0
class Solver():
    def __init__(self, imdb, roidb, val_imdb, val_roidb, pretrain_model):
        self.imdb = imdb
        self.roidb = roidb
        self.val_imdb = val_imdb
        self.val_roidb = val_roidb
        self.pretrain_model = pretrain_model
        self.model_dir = FLAGS.model_path
        self.log_dir = FLAGS.log_path
        self.val_log_dir = FLAGS.val_log_path

        self.cnn_net = Vgg16()
        with tf.device("/gpu:0"):
            self.faster_rcnn = FasterRCNN(self.cnn_net, self.imdb.num_classes, batch_size=config.BATCH_SIZE, is_training=True)
            self.faster_rcnn.build(mode='train')

        variables = tf.global_variables()
        print ("all var:",variables)

    def train_net(self,max_iters=700000):
        """Train a Faster R-CNN network."""
        roidb = filter_roidb(self.roidb)

#allow_soft_placement=True,log_device_placement=True
        tfconfig = tf.ConfigProto(allow_soft_placement=True)
        tfconfig.gpu_options.allow_growth = True

        with tf.Session(config=tfconfig) as sess:
                self.initialize(sess, self.pretrain_model)
                self.train_model(sess, max_iters)

    def get_variables_in_checkpoint_file(self, file_name):
        from tensorflow.python import pywrap_tensorflow
        try:
          reader = pywrap_tensorflow.NewCheckpointReader(file_name)
          var_to_shape_map = reader.get_variable_to_shape_map()
          return var_to_shape_map
        except Exception as e:  # pylint: disable=broad-except
          print(str(e))
          if "corrupted compressed block contents" in str(e):
            print("It's likely that your checkpoint file has been compressed "
                  "with SNAPPY.")

    def initialize(self, sess, pretrained_model):
        tf.set_random_seed(1234)
        random.seed(1234)

        self.saver = tf.train.Saver(max_to_keep=100000)
        try:
            checkpoint_dir = self.model_dir
            print("Trying to restore last checkpoint ...:",checkpoint_dir)
            last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)
            self.saver.restore(sess, save_path=last_chk_path)
            print("restore last checkpoint %s done"%checkpoint_dir)
        except Exception as e:
            print("Failed to restore checkpoint. Initializing variables instead."),e

            # Initial file lists are empty
            # Fresh train directly from ImageNet weights
            print('Loading initial model weights from {:s}'.format(pretrained_model))
            variables = tf.global_variables()
            # Initialize all variables first
            sess.run(tf.variables_initializer(variables, name='init'))
            var_keep_dic = self.get_variables_in_checkpoint_file(pretrained_model)
            variables_to_restore = self.cnn_net.get_variables_to_restore(variables, var_keep_dic)
            restorer = tf.train.Saver(variables_to_restore)
            restorer.restore(sess, pretrained_model)
            self.cnn_net.fix_variables(sess, pretrained_model)

        self.writer = tf.summary.FileWriter(self.log_dir, sess.graph)
        self.val_writer = tf.summary.FileWriter(self.val_log_dir, sess.graph)

    def save_model(self, sess, global_step):
        self.saver.save(sess, os.path.join(self.model_dir,'cp'), global_step=global_step)
        print ("save model:",os.path.join(self.model_dir,'cp'))

    def train_model(self, sess, max_iters):
        #print "train:", self.roidb
        # Build data layers for both training and validation set
        self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes)
        self.val_data_layer = RoIDataLayer(self.val_roidb, self.val_imdb.num_classes)
        iter = 0
        rate = config.LEARNING_RATE
        next_step = [50000]

        global_step = sess.run(self.faster_rcnn.global_op)
        print ("start global step:",global_step)
        # Make sure the lists are not empty
        while iter < max_iters + 1:
            if global_step == 0:
                self.faster_rcnn.assign_lr(sess, rate)

            if len(next_step) > 0 and global_step == next_step[0]:
                rate *= config.GAMMA
                self.faster_rcnn.assign_lr(sess, rate)
                next_step=next_step[1:]
                print ("next step:",next_step)

            blobs = self.data_layer.forward()

            image = blobs['data']
            gt_boxes = blobs['gt_boxes']
            im_info = blobs['im_info']

            start_time = time.time()
            loss, lr, global_step, summary_str = self.faster_rcnn.train_step(sess, image, gt_boxes, im_info)
            iter+=1
            diff = time.time() - start_time
            print ("===== loss:",loss, "lr:",lr, "global step:",global_step, "time:",diff, "step:",iter)

            if iter % 100 == 0:
                self.writer.add_summary(summary_str, global_step)
                summary = tf.Summary()
                summary.value.add(tag='loss', simple_value=loss)
                summary.value.add(tag='lr', simple_value=lr)
                self.writer.add_summary(summary, global_step)

            if iter % config.SAVE_STEP == 0:
                self.save_model(sess, global_step)

                val_blobs = self.val_data_layer.forward()
                #print ("val_blobs['data']",val_blobs['data'], val_blobs['gt_boxes'])
                #print (val_blobs['gt_boxes'])
                #print (val_blobs['im_info'])
                val_loss = self.faster_rcnn.get_loss(sess, val_blobs['data'], val_blobs['gt_boxes'], val_blobs['im_info'])
                print ("val loss:",val_loss)
                summary = tf.Summary()
                summary.value.add(tag='loss', simple_value=loss)
                self.val_writer.add_summary(summary, global_step)
Esempio n. 13
0
def main(gpu=-1, epoch=100, batch_size=1, lr=5e-4, out='result'):
    train_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007')
    test_data = VOCDetectionDataset(mode='val', use_cache=True, year='2007')

    def transform(in_data):
        img, bbox = in_data
        img -= np.array([103.939, 116.779, 123.68])[:, None, None]

        # Resize bounding box to a shape
        # with the smaller edge at least at length 600
        input_shape = img.shape[1:]
        output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200)
        img = transforms.resize(img, output_shape)
        bbox = transforms.bbox_resize(bbox, input_shape, output_shape)

        # horizontally flip
        img, flips = transforms.random_flip(img,
                                            horizontal_flip=True,
                                            return_flip=True)
        h_flip = flips['h']
        bbox = transforms.bbox_flip(bbox, output_shape, h_flip)
        return img, bbox

    transforms.extend(train_data, transform)
    transforms.extend(test_data, transform)

    model = FasterRCNN(gpu=gpu)
    if gpu != -1:
        model.to_gpu(gpu)
        chainer.cuda.get_device(gpu).use()
    # optimizer = chainer.optimizers.MomentumSGD(lr=lr)
    optimizer = chainer.optimizers.Adam(alpha=0.001,
                                        beta1=0.9,
                                        beta2=0.999,
                                        eps=1e-8)
    optimizer.setup(model)
    # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    train_iter = chainer.iterators.SerialIterator(test_data, batch_size=1)
    updater = ParallelUpdater(train_iter, optimizer, devices={'main': gpu})

    # updater = chainer.training.updater.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)

    log_interval = 20, 'iteration'
    val_interval = 3000, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration', 'main/time', 'main/rpn_loss_cls', 'main/rpn_loss_bbox',
        'main/loss_cls', 'main/loss_bbox'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # visualize training
    trainer.extend(extensions.PlotReport(['main/rpn_loss_cls'],
                                         file_name='rpn_loss_cls.png'),
                   trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/rpn_loss_bbox'],
                                         file_name='rpn_loss_bbox.png'),
                   trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/loss_cls'],
                                         file_name='loss_cls.png'),
                   trigger=log_interval)
    trainer.extend(extensions.PlotReport(['main/loss_bbox'],
                                         file_name='loss_bbox.png'),
                   trigger=log_interval)
    trainer.extend(
        DetectionVisReport(
            range(10),  # visualize outputs for the first 10 data of test_data
            train_data,
            model,
            filename_base='detection_train',
            predict_func=model.predict_bboxes),
        trigger=val_interval,
        invoke_before_training=True)
    trainer.extend(
        DetectionVisReport(
            range(10),  # visualize outputs for the first 10 data of test_data
            test_data,
            model,
            forward_func=model.predict_bboxes),
        trigger=val_interval,
        invoke_before_training=True)

    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()
def imgs_to_roi_features(imgs_paths, C, bbox_threshold, on_each_iter=None, train=False):
    """Given a set of images paths transforms them to the
    RoI pooled feature of the most confident object in the image
    
    Arguments:
        imgs_paths {list(file_paths)} -- List of the file paths the imgs are found
        C {Config} -- Configuration object taken from pickle
    
    Returns:
        {
            '<img_path>': ( list((x1, y1, x2, y2)), list((prob, class)), list(feature (7x7x512)) )
        }
    """

    if not train:
        # turn off any data augmentation
        C.use_horizontal_flips = False
        C.use_vertical_flips = False
        C.rot_90 = False

    model_frcnn = FasterRCNN()
    num_features = 512

    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (VGG here, can be Resnet50, Inception, etc)
    shared_layers = model_frcnn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = model_frcnn.rpn_layer(shared_layers, num_anchors)

    classifier = model_frcnn.classifier_layer(
        feature_map_input, roi_input, C.num_rois, nb_classes=len(C.class_mapping)
    )

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    feature_extraction_input = Input(shape=(1, 4))
    roi_pooling = model_frcnn.roi_pooling_layer(
        feature_map_input, feature_extraction_input, 1, nb_classes=len(C.class_mapping)
    )
    model_roi_pooling = Model(
        [feature_map_input, feature_extraction_input], roi_pooling
    )

    try:
        model_rpn.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(C.model_path, by_name=True)
    except Exception:
        # When calling this function from the server, given that
        # it is multithreaded, an exception is raised since the model's
        # weights were already loaded.
        # A better approach would be to create the model only once
        pass

    model_rpn.compile(optimizer="sgd", loss="mse")
    model_classifier.compile(optimizer="sgd", loss="mse")

    # Switch key value for class mapping
    class_mapping = C.class_mapping
    class_mapping = {v: k for k, v in class_mapping.items()}

    features_per_class = {}
    metadata_per_class = {}

    result = {}
    for img_path in imgs_paths:
        img = cv2.imread(img_path)
        X, ratio = format_img(img, C)

        X = np.transpose(X, (0, 2, 3, 1))

        # get output layer Y1, Y2 from the RPN and the feature maps F
        # Y1: y_rpn_cls
        # Y2: y_rpn_regr
        [Y1, Y2, F] = model_rpn.predict(X)

        # Get bboxes by applying NMS
        # R.shape = (300, 4)
        R = model_frcnn.rpn_to_roi(
            Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7
        )

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        feature_img_box_mapping = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk : C.num_rois * (jk + 1), :], axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, : curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1] :, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            # Calculate bboxes coordinates on resized image
            for ii in range(P_cls.shape[1]):
                # Ignore 'bg' class
                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]
                ) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num : 4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = model_frcnn.apply_regr(x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                feature_img_box_mapping[
                    (
                        C.rpn_stride * x,
                        C.rpn_stride * y,
                        C.rpn_stride * (x + w),
                        C.rpn_stride * (y + h),
                    )
                ] = ROIs[0, ii, :]
                bboxes[cls_name].append(
                    [
                        C.rpn_stride * x,
                        C.rpn_stride * y,
                        C.rpn_stride * (x + w),
                        C.rpn_stride * (y + h),
                    ]
                )
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = model_frcnn.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.2
            )
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                # Calculate real coordinates on original image
                (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(
                    ratio, x1, y1, x2, y2
                )

                features = model_roi_pooling.predict(
                    [
                        F,
                        np.reshape(
                            feature_img_box_mapping[(x1, y1, x2, y2)], (1, 1, 4)
                        ),
                    ]
                )
                features = features.reshape((-1,))

                result[img_path] = result.get(img_path, ([], [], []))
                result[img_path][0].append((real_x1, real_y1, real_x2, real_y2))
                result[img_path][1].append((new_probs[jk], key))
                result[img_path][2].append(features)

        if on_each_iter:
            on_each_iter()
    return result
Esempio n. 15
0
#!/usr/bin/env python

import torch
import torchvision
import matplotlib.pyplot as plt
import PIL
import numpy as np
import cv2
from faster_rcnn import FasterRCNN

if __name__ == "__main__":

    videoCaptureObject = cv2.VideoCapture(2)

    model = FasterRCNN()

    while (True):
        cap, frame = videoCaptureObject.read()
        if cap:
            # cv2.imshow('Frame', frame)
            mask = model.person_mask(frame)
            cv2.imshow("Mask", mask)
        if (cv2.waitKey(1) and 0xFF == ord('q')):
            videoCaptureObject.release()
            cv2.destroyAllWindows()
            break