def get_model(num_classes, device="cuda:0"): # 加载backbone时不需要加载官方模型 backbone = resnet_fpn_backbone('resnet50', pretrained=False, trainable_layers=3) model = FasterRCNN(backbone, num_classes=91) # 加载官方模型,这里num_classes不能修改 # 加载fasterrcnn_resnet50_fpn官方模型 # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth weights_dict = torch.load( "C:/Users/lixiao/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth", map_location=device) missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) overwrite_eps(model, 0.0) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # 修改predictor return model
def __init__(self, num_classes): self.model_dir = FLAGS.model_path self.cnn_net = Vgg16() with tf.device("/gpu:0"): self.faster_rcnn = FasterRCNN(self.cnn_net, num_classes, batch_size=BATCH_SIZE, is_training=False) self.faster_rcnn.build(mode='predict') self._initialize()
def get_model(num_classes, device="cuda:0"): # # https://download.pytorch.org/models/vgg16-397923af.pth # # 如果使用mobilenetv2的话就下载对应预训练权重并注释下面三行,接着把mobilenetv2模型对应的两行代码注释取消掉 # vgg_feature = vgg(model_name="vgg16", weights_path="./backbone/vgg16.pth").features # backbone = torch.nn.Sequential(*list(vgg_feature._modules.values())[:-1]) # 删除feature中最后的maxpool层 # backbone.out_channels = 512 # # https://download.pytorch.org/models/mobilenet_v2-b0353104.pth # # backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features # # backbone.out_channels = 1280 # 设置对应backbone输出特征矩阵的channels # anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), # aspect_ratios=((0.5, 1.0, 2.0),)) # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling # output_size=[7, 7], # roi_pooling输出特征矩阵尺寸 # sampling_ratio=2) # 采样率 # model = FasterRCNN(backbone=backbone, # num_classes=num_classes, # rpn_anchor_generator=anchor_generator, # box_roi_pool=roi_pooler) # 加载backbone时不需要加载官方模型 backbone = resnet_fpn_backbone('resnet50', pretrained=False, trainable_layers=3) model = FasterRCNN(backbone, num_classes=91) # 加载官方模型,这里num_classes不能修改 # 加载fasterrcnn_resnet50_fpn官方模型 # https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth weights_dict = torch.load( "C:/Users/lixiao/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth", map_location=device) missing_keys, unexpected_keys = model.load_state_dict(weights_dict, strict=False) overwrite_eps(model, 0.0) if len(missing_keys) != 0 or len(unexpected_keys) != 0: print("missing_keys: ", missing_keys) print("unexpected_keys: ", unexpected_keys) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # 修改predictor return model
def __init__(self, imdb, roidb, val_imdb, val_roidb, pretrain_model): self.imdb = imdb self.roidb = roidb self.val_imdb = val_imdb self.val_roidb = val_roidb self.pretrain_model = pretrain_model self.model_dir = FLAGS.model_path self.log_dir = FLAGS.log_path self.val_log_dir = FLAGS.val_log_path self.cnn_net = Vgg16() with tf.device("/gpu:0"): self.faster_rcnn = FasterRCNN(self.cnn_net, self.imdb.num_classes, batch_size=config.BATCH_SIZE, is_training=True) self.faster_rcnn.build(mode='train') variables = tf.global_variables() print ("all var:",variables)
def main(): args = arguments.parse_args() dataset = VOCDataSet("data", args.dataset, enabled_flip=False) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=5, collate_fn=collate_fn) print("[+] Bootstrapping model") print("[+] Loading rpn model from %s" % args.rpn) print("[+] Loading fast rcnn model from %s" % args.fastrcnn) fast_rcnn_checkpoint = torch.load(args.fastrcnn) rpn_checkpoint = torch.load(args.rpn) fast_rcnn = FasterRCNN().cuda() fast_rcnn.load_state_dict(fast_rcnn_checkpoint["state_dict"]) fast_rcnn.eval() rpn = RegionProposalNetwork().cuda() rpn.load_state_dict(rpn_checkpoint["state_dict"]) print("[+] Calculating Average Precision") ap = get_ap(rpn, fast_rcnn, dataset, dataloader, name="%s_%s" % (args.dataset, args.name)) print("Average Precision=%.3f" % ap)
class Predict(): def __init__(self, num_classes): self.model_dir = FLAGS.model_path self.cnn_net = Vgg16() with tf.device("/gpu:0"): self.faster_rcnn = FasterRCNN(self.cnn_net, num_classes, batch_size=BATCH_SIZE, is_training=False) self.faster_rcnn.build(mode='predict') self._initialize() def predict(self,image, im_info): """Train a Faster R-CNN network.""" #allow_soft_placement=True,log_device_placement=True return self._predict(image,im_info) def _initialize(self): tf.set_random_seed(1234) random.seed(1234) tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True self.sess = tf.Session(config=tfconfig) self.saver = tf.train.Saver(max_to_keep=100000) try: checkpoint_dir = self.model_dir print("Trying to restore last checkpoint ...:",checkpoint_dir) last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) self.saver.restore(self.sess, save_path=last_chk_path) print("restore last checkpoint %s done"%checkpoint_dir) except Exception as e: print e raise e def _predict(self, image, im_info): return self.faster_rcnn.predict(self.sess, image, im_info)
def train(): """ train """ print('execute train') # TODO train_inputs = None train_teachers = None test_inputs = None test_teachers = None anchors = get_default_anchors() train_taegets = [TrainTarget.BACKBONE, TrainTarget.RPN, TrainTarget.HEAD] network = FasterRCNN( INPUT_SHAPE, 2, anchors #, train_taegets= None, is_predict=True , train_taegets=train_taegets, is_predict=False) model = network.get_model_with_default_compile() network.draw_model_summary( file_name=os.path.join(os.pardir, 'ModelLayers.png'))
def __init__(self, nhidden, n_object_cats, n_predicate_cats, MPS_iter, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=True, base_model='vgg'): super(Hierarchical_Descriptive_Model, self).__init__(nhidden, n_object_cats, n_predicate_cats, MPS_iter, object_loss_weight, predicate_loss_weight, dropout) self.dropout = dropout # self.rpn = RPN(use_kmeans_anchors) self.rcnn = FasterRCNN(nhidden, use_kmeans_anchors, n_object_cats, model=base_model) # self.roi_pool_object = RoIPool(7, 7, 1.0/16) self.roi_pool_phrase = RoIAlign(7, 7, 1.0/16) if base_model == 'vgg': # self.fc6 = FC(512*7*7, nhidden) self.fc6_phrase = FC(512*7*7, nhidden, relu=True) elif base_model == 'resnet50' or base_model == 'resnet101': # self.fc6 = FC(1024*7*7, nhidden) self.fc6_phrase = FC(1024*7*7, nhidden, relu=True) else: print('please choose a model') # self.fc7 = FC(nhidden, nhidden, relu=True) self.fc7_phrase = FC(nhidden, nhidden, relu=True) self.spacial_conv = SpacialConv(pooling_size=32) if MPS_iter == 0: self.mps = None else: self.mps = Hierarchical_Message_Passing_Structure(nhidden, n_object_cats, n_predicate_cats) # the hierarchical message passing structure network.weights_normal_init(self.mps, 0.01) # self.score_fc = FC(nhidden, self.n_classes_obj, relu=False) # self.bbox_fc = FC(nhidden, self.n_classes_obj * 4, relu=False) self.score_fc_pred = FC(nhidden+64, self.n_classes_pred, relu=False) # self.bbox_pred_fc = FC(nhidden, self.n_classes_pred * 4, relu=False) # network.weights_normal_init(self.score_fc, 0.01) # network.weights_normal_init(self.bbox_fc, 0.005) network.weights_normal_init(self.score_fc_pred, 0.01)
def main(): global args args = arguments.parse_args() experiment_env = create_experiment_dir() assert args.rois_per_batch % args.batch_size == 0, "Uneven number of rois per image" rois_per_image = args.rois_per_batch / args.batch_size train_data = VOCDataSetROIs("data", "train", rois_per_image, enabled_flip=True) dataloader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=5, collate_fn=collate_rois_fn) ################### MODEL BOOTSRAP ##################### print("[+] Bootstrapping model") if args.stage_2_path is not None: print("[+] Loading stage 2 weights") net = FasterRCNN(args.stage_2_path).cuda() net.train() if args.resume is not None: print("[+] Resuming from %s" % args.resume) checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint["state_dict"]) cross_entropy = CrossEntropyLoss(size_average=True).cuda() smooth_l1_loss = SmoothL1Loss(size_average=False).cuda() optimizer = opt.SGD( [params for params in net.parameters() if params.requires_grad], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) ################### MODEL TRAINING ##################### print("[+] Training model") start_epoch = 0 if args.resume is None else checkpoint["epoch"] for epoch in xrange(start_epoch, args.epoch): adjust_learning_rate(optimizer, epoch) train(net, cross_entropy, smooth_l1_loss, optimizer, dataloader, experiment_env, epoch)
from faster_rcnn import FasterRCNN if __name__ == '__main__': test_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007', bgr=True) wrappers = [ lambda d: SubtractWrapper(d, value=np.array([103.939, 116.779, 123.68])), lambda d: ResizeWrapper(d, preprocess_idx=0, output_shape=output_shape_soft_min_hard_max( 600, 1200), hook=bbox_resize_hook(1)), ] for wrapper in wrappers: test_data = wrapper(test_data) model = FasterRCNN() chainer.serializers.load_npz('VGG16_faster_rcnn_final.model', model) trainer = mock.MagicMock() trainer.out = 'result' trainer.updater.iteration = 0 extension = DetectionVisReport([3, 4, 5, 6, 7, 8], test_data, model, predict_func=model.predict_bboxes) extension(trainer)
self.m1 = nn.Conv2d(n, 18, 3, 1, 1) self.m2 = nn.Conv2d(n, 36, 3, 1, 1) # normal_init(self.conv1, 0, 0.01) normal_init(self.m1, 0, 0.01) normal_init(self.m2, 0, 0.01) def forward(self, x): # x = F.relu(self.conv1(x)) return self.m1(x), self.m2(x) def normal_init(m, mean, stddev, truncated=False): """ weight initalizer: truncated normal and random normal. """ # x is a parameter if truncated: m.weight.data.normal_().fmod_(2).mul_(stddev).add_( mean) # not a perfect approximation else: m.weight.data.normal_(mean, stddev) m.bias.data.zero_() FasterRCNN_model = FasterRCNN( features=Feature_extractor(sqz_feature_extractor), pooler=pooler, classifier=Classifier(), rpn=RPN(classifier=RPNClassifier(512)))
class Solver(): def __init__(self, imdb, roidb, val_imdb, val_roidb, pretrain_model): self.imdb = imdb self.roidb = roidb self.val_imdb = val_imdb self.val_roidb = val_roidb self.pretrain_model = pretrain_model self.model_dir = FLAGS.model_path self.log_dir = FLAGS.log_path self.val_log_dir = FLAGS.val_log_path self.cnn_net = Vgg16() with tf.device("/gpu:0"): self.faster_rcnn = FasterRCNN(self.cnn_net, self.imdb.num_classes, batch_size=config.BATCH_SIZE, is_training=True) self.faster_rcnn.build(mode='train') variables = tf.global_variables() print ("all var:",variables) def train_net(self,max_iters=700000): """Train a Faster R-CNN network.""" roidb = filter_roidb(self.roidb) #allow_soft_placement=True,log_device_placement=True tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: self.initialize(sess, self.pretrain_model) self.train_model(sess, max_iters) def get_variables_in_checkpoint_file(self, file_name): from tensorflow.python import pywrap_tensorflow try: reader = pywrap_tensorflow.NewCheckpointReader(file_name) var_to_shape_map = reader.get_variable_to_shape_map() return var_to_shape_map except Exception as e: # pylint: disable=broad-except print(str(e)) if "corrupted compressed block contents" in str(e): print("It's likely that your checkpoint file has been compressed " "with SNAPPY.") def initialize(self, sess, pretrained_model): tf.set_random_seed(1234) random.seed(1234) self.saver = tf.train.Saver(max_to_keep=100000) try: checkpoint_dir = self.model_dir print("Trying to restore last checkpoint ...:",checkpoint_dir) last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir) self.saver.restore(sess, save_path=last_chk_path) print("restore last checkpoint %s done"%checkpoint_dir) except Exception as e: print("Failed to restore checkpoint. Initializing variables instead."),e # Initial file lists are empty # Fresh train directly from ImageNet weights print('Loading initial model weights from {:s}'.format(pretrained_model)) variables = tf.global_variables() # Initialize all variables first sess.run(tf.variables_initializer(variables, name='init')) var_keep_dic = self.get_variables_in_checkpoint_file(pretrained_model) variables_to_restore = self.cnn_net.get_variables_to_restore(variables, var_keep_dic) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, pretrained_model) self.cnn_net.fix_variables(sess, pretrained_model) self.writer = tf.summary.FileWriter(self.log_dir, sess.graph) self.val_writer = tf.summary.FileWriter(self.val_log_dir, sess.graph) def save_model(self, sess, global_step): self.saver.save(sess, os.path.join(self.model_dir,'cp'), global_step=global_step) print ("save model:",os.path.join(self.model_dir,'cp')) def train_model(self, sess, max_iters): #print "train:", self.roidb # Build data layers for both training and validation set self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes) self.val_data_layer = RoIDataLayer(self.val_roidb, self.val_imdb.num_classes) iter = 0 rate = config.LEARNING_RATE next_step = [50000] global_step = sess.run(self.faster_rcnn.global_op) print ("start global step:",global_step) # Make sure the lists are not empty while iter < max_iters + 1: if global_step == 0: self.faster_rcnn.assign_lr(sess, rate) if len(next_step) > 0 and global_step == next_step[0]: rate *= config.GAMMA self.faster_rcnn.assign_lr(sess, rate) next_step=next_step[1:] print ("next step:",next_step) blobs = self.data_layer.forward() image = blobs['data'] gt_boxes = blobs['gt_boxes'] im_info = blobs['im_info'] start_time = time.time() loss, lr, global_step, summary_str = self.faster_rcnn.train_step(sess, image, gt_boxes, im_info) iter+=1 diff = time.time() - start_time print ("===== loss:",loss, "lr:",lr, "global step:",global_step, "time:",diff, "step:",iter) if iter % 100 == 0: self.writer.add_summary(summary_str, global_step) summary = tf.Summary() summary.value.add(tag='loss', simple_value=loss) summary.value.add(tag='lr', simple_value=lr) self.writer.add_summary(summary, global_step) if iter % config.SAVE_STEP == 0: self.save_model(sess, global_step) val_blobs = self.val_data_layer.forward() #print ("val_blobs['data']",val_blobs['data'], val_blobs['gt_boxes']) #print (val_blobs['gt_boxes']) #print (val_blobs['im_info']) val_loss = self.faster_rcnn.get_loss(sess, val_blobs['data'], val_blobs['gt_boxes'], val_blobs['im_info']) print ("val loss:",val_loss) summary = tf.Summary() summary.value.add(tag='loss', simple_value=loss) self.val_writer.add_summary(summary, global_step)
def main(gpu=-1, epoch=100, batch_size=1, lr=5e-4, out='result'): train_data = VOCDetectionDataset(mode='train', use_cache=True, year='2007') test_data = VOCDetectionDataset(mode='val', use_cache=True, year='2007') def transform(in_data): img, bbox = in_data img -= np.array([103.939, 116.779, 123.68])[:, None, None] # Resize bounding box to a shape # with the smaller edge at least at length 600 input_shape = img.shape[1:] output_shape = _shape_soft_min_hard_max(input_shape, 600, 1200) img = transforms.resize(img, output_shape) bbox = transforms.bbox_resize(bbox, input_shape, output_shape) # horizontally flip img, flips = transforms.random_flip(img, horizontal_flip=True, return_flip=True) h_flip = flips['h'] bbox = transforms.bbox_flip(bbox, output_shape, h_flip) return img, bbox transforms.extend(train_data, transform) transforms.extend(test_data, transform) model = FasterRCNN(gpu=gpu) if gpu != -1: model.to_gpu(gpu) chainer.cuda.get_device(gpu).use() # optimizer = chainer.optimizers.MomentumSGD(lr=lr) optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8) optimizer.setup(model) # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_iter = chainer.iterators.SerialIterator(test_data, batch_size=1) updater = ParallelUpdater(train_iter, optimizer, devices={'main': gpu}) # updater = chainer.training.updater.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) log_interval = 20, 'iteration' val_interval = 3000, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'main/time', 'main/rpn_loss_cls', 'main/rpn_loss_bbox', 'main/loss_cls', 'main/loss_bbox' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # visualize training trainer.extend(extensions.PlotReport(['main/rpn_loss_cls'], file_name='rpn_loss_cls.png'), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/rpn_loss_bbox'], file_name='rpn_loss_bbox.png'), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/loss_cls'], file_name='loss_cls.png'), trigger=log_interval) trainer.extend(extensions.PlotReport(['main/loss_bbox'], file_name='loss_bbox.png'), trigger=log_interval) trainer.extend( DetectionVisReport( range(10), # visualize outputs for the first 10 data of test_data train_data, model, filename_base='detection_train', predict_func=model.predict_bboxes), trigger=val_interval, invoke_before_training=True) trainer.extend( DetectionVisReport( range(10), # visualize outputs for the first 10 data of test_data test_data, model, forward_func=model.predict_bboxes), trigger=val_interval, invoke_before_training=True) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def imgs_to_roi_features(imgs_paths, C, bbox_threshold, on_each_iter=None, train=False): """Given a set of images paths transforms them to the RoI pooled feature of the most confident object in the image Arguments: imgs_paths {list(file_paths)} -- List of the file paths the imgs are found C {Config} -- Configuration object taken from pickle Returns: { '<img_path>': ( list((x1, y1, x2, y2)), list((prob, class)), list(feature (7x7x512)) ) } """ if not train: # turn off any data augmentation C.use_horizontal_flips = False C.use_vertical_flips = False C.rot_90 = False model_frcnn = FasterRCNN() num_features = 512 input_shape_img = (None, None, 3) input_shape_features = (None, None, num_features) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(C.num_rois, 4)) feature_map_input = Input(shape=input_shape_features) # define the base network (VGG here, can be Resnet50, Inception, etc) shared_layers = model_frcnn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn_layers = model_frcnn.rpn_layer(shared_layers, num_anchors) classifier = model_frcnn.classifier_layer( feature_map_input, roi_input, C.num_rois, nb_classes=len(C.class_mapping) ) model_rpn = Model(img_input, rpn_layers) model_classifier_only = Model([feature_map_input, roi_input], classifier) model_classifier = Model([feature_map_input, roi_input], classifier) feature_extraction_input = Input(shape=(1, 4)) roi_pooling = model_frcnn.roi_pooling_layer( feature_map_input, feature_extraction_input, 1, nb_classes=len(C.class_mapping) ) model_roi_pooling = Model( [feature_map_input, feature_extraction_input], roi_pooling ) try: model_rpn.load_weights(C.model_path, by_name=True) model_classifier.load_weights(C.model_path, by_name=True) except Exception: # When calling this function from the server, given that # it is multithreaded, an exception is raised since the model's # weights were already loaded. # A better approach would be to create the model only once pass model_rpn.compile(optimizer="sgd", loss="mse") model_classifier.compile(optimizer="sgd", loss="mse") # Switch key value for class mapping class_mapping = C.class_mapping class_mapping = {v: k for k, v in class_mapping.items()} features_per_class = {} metadata_per_class = {} result = {} for img_path in imgs_paths: img = cv2.imread(img_path) X, ratio = format_img(img, C) X = np.transpose(X, (0, 2, 3, 1)) # get output layer Y1, Y2 from the RPN and the feature maps F # Y1: y_rpn_cls # Y2: y_rpn_regr [Y1, Y2, F] = model_rpn.predict(X) # Get bboxes by applying NMS # R.shape = (300, 4) R = model_frcnn.rpn_to_roi( Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7 ) # convert from (x1,y1,x2,y2) to (x,y,w,h) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] # apply the spatial pyramid pooling to the proposed regions bboxes = {} probs = {} feature_img_box_mapping = {} for jk in range(R.shape[0] // C.num_rois + 1): ROIs = np.expand_dims(R[C.num_rois * jk : C.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // C.num_rois: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, : curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1] :, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) # Calculate bboxes coordinates on resized image for ii in range(P_cls.shape[1]): # Ignore 'bg' class if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax( P_cls[0, ii, :] ) == (P_cls.shape[2] - 1): continue cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] if cls_name not in bboxes: bboxes[cls_name] = [] probs[cls_name] = [] (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) try: (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num : 4 * (cls_num + 1)] tx /= C.classifier_regr_std[0] ty /= C.classifier_regr_std[1] tw /= C.classifier_regr_std[2] th /= C.classifier_regr_std[3] x, y, w, h = model_frcnn.apply_regr(x, y, w, h, tx, ty, tw, th) except: pass feature_img_box_mapping[ ( C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h), ) ] = ROIs[0, ii, :] bboxes[cls_name].append( [ C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h), ] ) probs[cls_name].append(np.max(P_cls[0, ii, :])) for key in bboxes: bbox = np.array(bboxes[key]) new_boxes, new_probs = model_frcnn.non_max_suppression_fast( bbox, np.array(probs[key]), overlap_thresh=0.2 ) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] # Calculate real coordinates on original image (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates( ratio, x1, y1, x2, y2 ) features = model_roi_pooling.predict( [ F, np.reshape( feature_img_box_mapping[(x1, y1, x2, y2)], (1, 1, 4) ), ] ) features = features.reshape((-1,)) result[img_path] = result.get(img_path, ([], [], [])) result[img_path][0].append((real_x1, real_y1, real_x2, real_y2)) result[img_path][1].append((new_probs[jk], key)) result[img_path][2].append(features) if on_each_iter: on_each_iter() return result
#!/usr/bin/env python import torch import torchvision import matplotlib.pyplot as plt import PIL import numpy as np import cv2 from faster_rcnn import FasterRCNN if __name__ == "__main__": videoCaptureObject = cv2.VideoCapture(2) model = FasterRCNN() while (True): cap, frame = videoCaptureObject.read() if cap: # cv2.imshow('Frame', frame) mask = model.person_mask(frame) cv2.imshow("Mask", mask) if (cv2.waitKey(1) and 0xFF == ord('q')): videoCaptureObject.release() cv2.destroyAllWindows() break