def run_model_pb(args, net, init_net, im, check_blobs): workspace.ResetWorkspace() workspace.RunNetOnce(init_net) mutils.create_input_blobs_for_net(net.Proto()) workspace.CreateNet(net) # input_blobs, _ = core_test._get_blobs(im, None) input_blobs = _prepare_blobs( im, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE ) gpu_blobs = [] if args.device == 'gpu': gpu_blobs = ['data'] for k, v in input_blobs.items(): workspace.FeedBlob( core.ScopedName(k), v, mutils.get_device_option_cuda() if k in gpu_blobs else mutils.get_device_option_cpu() ) try: workspace.RunNet(net) scores = workspace.FetchBlob('score_nms') classids = workspace.FetchBlob('class_nms') boxes = workspace.FetchBlob('bbox_nms') except Exception as e: print('Running pb model failed.\n{}'.format(e)) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) boxes = np.column_stack((boxes, scores)) # sort the results based on score for comparision boxes, _, _, classids = _sort_results( boxes, None, None, classids) # write final result back to workspace workspace.FeedBlob('result_boxes', boxes) workspace.FeedBlob('result_classids', classids) ret = _get_result_blobs(check_blobs) return ret
def run_on_image(img_url, net): img = get_image(img_url) input_blobs = _prepare_blobs(img, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in input_blobs.items(): workspace.FeedBlob(core.ScopedName(k), v, mutils.get_device_option_cpu()) workspace.RunNetOnce(net) blob_names = workspace.Blobs() for x in blob_names: print(x) #goods = [x for x in blob_names if x.split('_')[-1] == 'nms'] #print(goods) scores = workspace.FetchBlob('score_nms') classids = workspace.FetchBlob('class_nms') boxes = workspace.FetchBlob('bbox_nms') cls_prob = workspace.FetchBlob('cls_prob') bbox_pred = workspace.FetchBlob('bbox_pred') print(scores) print(classids) print(boxes) print("cls_prob: shape {}".format(cls_prob.shape)) print(cls_prob) print("bbox_pred: shape {}".format(bbox_pred.shape)) print(bbox_pred) #except Exception as e: # print('Model failed to run') # R = 0 # scores = np.zeros((R,), dtype=np.float32) # boxes = np.zeros((R, 4), dtype=np.float32) # classids = np.zeros((R,), dtype=np.float32) boxes = np.column_stack((boxes, scores)) #print(boxes) boxes, _, _, classids = _sort_results(boxes, None, None, classids) check_blobs = [ "result_boxes", "result_classids", # result ] workspace.FeedBlob('result_boxes', boxes) workspace.FeedBlob('result_classids', classids) ret = _get_result_blobs(check_blobs) return ret
def run_model_cfg(args, im, check_blobs): workspace.ResetWorkspace() model, _ = load_model(args) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all( model, im, None, None, ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) # sort the results based on score for comparision boxes, segms, keypoints, classes = _sort_results(boxes, segms, keypoints, classes) # write final results back to workspace def _ornone(res): return np.array(res) if res is not None else np.array([], dtype=np.float32) with c2_utils.NamedCudaScope(0): workspace.FeedBlob(core.ScopedName('result_boxes'), _ornone(boxes)) workspace.FeedBlob(core.ScopedName('result_segms'), _ornone(segms)) workspace.FeedBlob(core.ScopedName('result_keypoints'), _ornone(keypoints)) workspace.FeedBlob(core.ScopedName('result_classids'), _ornone(classes)) # get result blobs with c2_utils.NamedCudaScope(0): ret = _get_result_blobs(check_blobs) return ret
def add_training_inputs(model, roidb=None): """Create network input ops and blobs used for training. To be called *after* model_builder.create(). """ # Implementation notes: # Typically, one would create the input ops and then the rest of the net. # However, creating the input ops depends on loading the dataset, which # can take a few minutes for COCO. # We prefer to avoid waiting so debugging can fail fast. # Thus, we create the net *without input ops* prior to loading the # dataset, and then add the input ops after loading the dataset. # Since we defer input op creation, we need to do a little bit of surgery # to place the input ops at the start of the network op list. assert model.train, 'Training inputs can only be added to a trainable model' if roidb is not None: # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoader( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS) orig_num_op = len(model.net._net.op) blob_names = roi_data.minibatch.get_minibatch_blob_names(is_training=True) for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs(model.roi_data_loader._blobs_queue_name, blob_names) if gpu_id == 0 and (cfg.MODEL.FASTER_RCNN or cfg.MODEL.RC): image_blob_name = core.ScopedName('data') rois_name = core.ScopedName('rois') # model.AddSummaryImage(image_blob_name) model.AddSummaryImageBoxes(image_blob_name, rois_name) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op)
def get_net(data_loader, name): logger = logging.getLogger(__name__) blob_names = data_loader.get_output_names() net = core.Net(name) net.type = 'dag' for gpu_id in range(cfg.NUM_GPUS): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope(muji.OnGPU(gpu_id)): for blob_name in blob_names: blob = core.ScopedName(blob_name) workspace.CreateBlob(blob) net.DequeueBlobs(data_loader._blobs_queue_name, blob_names) logger.info("Protobuf:\n" + str(net.Proto())) return net
def feed_all_word_vecs(model): landb = model.roi_data_loader._landb inputs = {} all_obj_word_vecs = landb.obj_vecs all_prd_word_vecs = landb.prd_vecs inputs['all_obj_word_vecs'] = all_obj_word_vecs inputs['all_prd_word_vecs'] = all_prd_word_vecs logger.info('feeding all_word_vecs...') for gpu_id in range(cfg.ROOT_DEVICE_ID, cfg.ROOT_DEVICE_ID + cfg.NUM_DEVICES): logger.info('feeding on GPU {}'.format(gpu_id)) with c2_utils.NamedCudaScope(gpu_id): for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=True))
def add_multilevel_pred_box_blob(model, blob_in, pred_boxes_name): ''' Add pred box blobs for multiple FPN levels to the blobs dict. parameters: blob_in: a dict mapping from blob name to numpy ndarray pred_boxes_name: 'bbox_pred_stage_1' or bbox_pred_stage_2' ''' workspace.RunNetOnce(model.param_init_net) lvl_min = cfg.FPN.RPN_MIN_LEVEL lvl_max = cfg.FPN.RPN_MAX_LEVEL pred_boxes_name = core.BlobReference(pred_boxes_name) pred_boxes = workspace.FetchBlob(core.ScopedName(pred_boxes_name)) lvs = fpn.map_rois_to_fpn_levels(pred_boxes, lvl_min, lvl_max) fpn.add_multilevel_roi_blobs(blob_in, pred_boxes_name, pred_boxes, lvs, lvl_min, lvl_max)
def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory threading_fn = multiprocessing.Process self._workers = [ threading_fn(target=RoIDataLoader.minibatch_loader2, args=(self.shared_readonly_dict, self._minibatch_queue, self._lock, self.mp_cur, self.mp_perm, self.coordinator)) for _ in range(self._num_workers) ] # Create one BlobsQueue per GPU, each of which feeds a blob in GPU # memory to a net for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): self.create_blobs_queue() # An enqueuer thread moves mini-batches from the shared CPU memory queue # to a GPU blobs queue # Each GPU will have it's own pool of enqueuer threads # Create one blob for each # (loader output, enqueuer thread, RoIDataLoader instance) triple: # <loader_output>_enqueue_<enqueuer_thread_id>_<loader_id> blob_names = self.get_output_names() enqueue_blob_names = [[ '{}_enqueue_{}_{}'.format(blob_name, i, self._loader_id) for blob_name in blob_names ] for i in range(self._num_enqueuers)] for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope( core.DeviceOption(caffe2_pb2.CUDA, gpu_id)): for blob_list in enqueue_blob_names: for blob in blob_list: workspace.CreateBlob(core.ScopedName(blob)) # Create enqueuer threads self._enqueuers = [ # This is enqueueing into C2, can't be done by multiple processes # so needs to be done using threading module threading.Thread(target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names[i])) for gpu_id in range(self._num_gpus) for i in range(self._num_enqueuers) ]
def add_image_blob(image_blob_name='image'): if image_blob_name in workspace.Blobs(): return image_blob_name image = Image.open(sample_image_path) image = preproc_image(image) device_opt = core.scope.CurrentDeviceScope() scoped_image_blob = core.ScopedName(image_blob_name) if device_opt is None: workspace.CreateBlob(scoped_image_blob) workspace.FeedBlob(scoped_image_blob, image) else: workspace.CreateBlob(scoped_image_blob, device_option=device_opt) workspace.FeedBlob(scoped_image_blob, image, device_option=device_opt) return image_blob_name, image
def im_detect_mask(model, im_scale, boxes): """Infer instance segmentation masks. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: b /mnt/storage/jialinwu/seg_every_thing/lib/core/test.py:389 pred_masks (ndarray): R x K x M x M array of class specific soft masks output by the network (must be processed by segm_results to convert into hard masks in the original image coordinate space) """ M = cfg.MRCNN.RESOLUTION if boxes.shape[0] == 0: pred_masks = np.zeros((0, M, M), np.float32) return pred_masks inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'mask_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.mask_net.Proto().name) # Fetch masks pred_masks = workspace.FetchBlob( core.ScopedName('mask_fcn_probs')).squeeze() masks_roi_feat = workspace.FetchBlob(core.ScopedName('_[mask]_roi_feat')) masks_fcn1 = workspace.FetchBlob(core.ScopedName('_[mask]_fcn1')) masks_fcn2 = workspace.FetchBlob(core.ScopedName('_[mask]_fcn2')) masks_fcn3 = workspace.FetchBlob(core.ScopedName('_[mask]_fcn3')) masks_fcn4 = workspace.FetchBlob(core.ScopedName('_[mask]_fcn4')) if cfg.MRCNN.CLS_SPECIFIC_MASK: pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M]) else: pred_masks = pred_masks.reshape([-1, 1, M, M]) return pred_masks, masks_roi_feat, masks_fcn1, masks_fcn2, masks_fcn3, masks_fcn4
def create_model(weights_file): """adapted from utils.train.setup_model_for_training """ model = model_builder.create(cfg.MODEL.TYPE, train=True) if cfg.MEMONGER: optimize_memory(model) # Performs random weight initialization as defined by the model workspace.RunNetOnce(model.param_init_net) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES ) # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoaderSimple( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS, minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE, blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY ) orig_num_op = len(model.net._net.op) blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True) with c2_utils.NamedCudaScope(0): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op) nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) nu.broadcast_parameters(model) workspace.CreateBlob("gpu_0/track_n_rois_two") workspace.CreateNet(model.net) # Start loading mini-batches and enqueuing blobs model.roi_data_loader.register_sigint_handler() model.roi_data_loader.start(prefill=True) return model
def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _ = blob_utils.get_image_blob(im, target_scale, target_max_size) workspace.FeedBlob(core.ScopedName('data'), im_blob) if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.conv_body_net.Proto()) else: workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def main(args): logger = logging.getLogger(__name__) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) if len(args.datasets): cfg.TRAIN.DATASETS = tuple(args.datasets) assert_and_infer_cfg(cache_urls=False) cfg.immutable(False) cfg.TRAIN.IMS_PER_BATCH = 1 cfg.immutable(True) model = create_model(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() # Iterate through all images # TODO: find a proper way to stop iteration for i in xrange(1000): logger.info("Processing {}".format(i +)) with c2_utils.NamedCudaScope(0): try: workspace.RunNet(model.net.Proto().name) except: pass # Fetch specified blobs blobs = [(blob_name, workspace.FetchBlob(core.ScopedName(blob_name))) \ for blob_name in args.blobs] invalid_blobs = [blob for blob in blobs if not hasattr(blob[1], 'shape')] assert len(invalid_blobs) == 0, "Blobs not found: {}".format([blob[0] \ for blob in invalid_blobs]) # Save blobs save_file = os.path.join(args.output_dir, str(i) + ".npz") logger.info("Saving to {}".format(save_file)) to_save = [blob[1] for blob in blobs] np.savez(open(save_file, "w+"), *to_save)
def _add_roi_track_head( model, add_roi_track_head_func, blob_in, dim_in, spatial_scale_in ): """Add a track prediction head to the model.""" # Capture model graph before adding the track head bbox_net = copy.deepcopy(model.net.Proto()) # Add the track head blob_track_head, dim_track_head = add_roi_track_head_func( model, blob_in, dim_in, spatial_scale_in ) # Add the track output blob_track = track_rcnn_heads.add_track_outputs( model, blob_track_head, dim_track_head ) if not model.train: # == inference # Inference uses a cascade of box predictions, then object association predictions # This requires separate nets for box and track prediction. # So we extract the track prediction net, store it as its own # # network, then restore model.net to be the bbox-only network if cfg.MODEL.SIBLING_BACKBONE_ON and 'track' in cfg.SIBLING.HEADS: track_net_temp, _ = c2_utils.SuffixNet( 'track_net_temp', model.net, len(bbox_net.op), blob_track ) model.track_net, _ = c2_utils.RenameNet( "track_net", track_net_temp, cfg.SIBLING.PREFFIX, excluded_nodes=[core.ScopedName("track_rois_fpn{}".format(i)) for i in xrange(cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1)] + [core.ScopedName("track_rois_idx_restore_int32"), str(blob_track)] ) model.AddParams([core.BlobReference(input_name) for op in model.track_net.Proto().op for input_name in op.input if input_name[-2] == "_"]) del track_net_temp else: model.track_net, _ = c2_utils.SuffixNet( 'track_net', model.net, len(bbox_net.op), blob_track ) model.net._net = bbox_net loss_gradients = None else: loss_gradients = track_rcnn_heads.add_track_losses(model) return loss_gradients, blob_track
def back_track(model, tracking): """Back tracking method for object re-identification""" detections = tracking.new_detections if not len(detections): return # reverse list and begin with second last lost_detections_list = list(tracking.lost_detections_deque)[-2::-1] # reverse list and begin with third last extras_list = list(tracking.extras_deque)[-3::-1] im_scale, boxes_raw, fpn_res_sum = tracking.extras_deque[-1] # Filter out new detections assign_inds = [det.assign_ind for det in detections] boxes = boxes_raw[assign_inds] classes = [det.cls for det in detections] m_rois = len(assign_inds) # Search for matching pairs in previously lost detections for i, lost_detections in enumerate(lost_detections_list): if not len(lost_detections): continue # Filter out detections im_scale_lost, boxes_lost, fpn_res_sum_lost = extras_list[i] assign_inds_lost = [det.assign_ind for det in lost_detections] boxes_lost = boxes_lost[assign_inds_lost] classes_lost = [det.cls for det in lost_detections] n_rois = len(assign_inds_lost) # Merge fpn_res_sums for blob_name, fpn_res_sum_lost_val in fpn_res_sum_lost.items(): workspace.FeedBlob(core.ScopedName(blob_name), np.concatenate(( fpn_res_sum_lost_val, fpn_res_sum[blob_name] ))) # Compute matches with c2_utils.NamedCudaScope(0): track = im_detect_track(model, [im_scale_lost, im_scale], [boxes_lost, boxes], [fpn_res_sum_lost, fpn_res_sum]) track_mat = track.reshape((n_rois, m_rois)) track_mat = np.where( np.bitwise_and( np.array([[cls_lost == cls for cls in classes] for cls_lost in classes_lost]), track_mat >= cfg.TRCNN.DETECTION_THRESH), track_mat, np.zeros((n_rois, m_rois))) assigned_inds_lost, assigned_inds = tracking.assign(lost_detections, detections, track_mat) logger.debug("Back tracking level {}:".format(i), [det.obj_id for j, det in enumerate(detections) if j in assigned_inds]) # Filter out newly assigned detections assign_inds = [j for j, det in enumerate(detections) if j not in assigned_inds] detections = [det for j, det in enumerate(detections) if j in assign_inds] # Assign back tracking.new_detections = detections boxes = boxes_raw[assign_inds] classes = [det.cls for det in detections] m_rois = len(assign_inds) # Filter out newly assigned lost detections assign_inds_lost = [j for j, det in enumerate(lost_detections) if j not in assigned_inds_lost] lost_detections = [det for j, det in enumerate(lost_detections) if j in assign_inds_lost] # Assign back starting from second last tracking.lost_detections_deque[-(i + 2)] = lost_detections if not len(detections): break
def box_results_with_nms_and_limit(scores, boxes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES #ADDED fetch ROI features roi_features = workspace.FetchBlob(core.ScopedName('roi_feat')) #ADDED initialize list cls_feats to store ROI features w respect to their most likely classes cls_feats = [[] for _ in range(num_classes)] cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) #ADDED keep only features whose scores > cfg.TEST.SCORE_THRESH)[0] feats_j = roi_features[inds] if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] #ADDED non-maximum suppression cls_feats[j] = feats_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] #ADDED cls_feats[j] = cls_feats[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] #ADDED return cls_feats return scores, boxes, cls_boxes, cls_feats
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def load_pretrained_weights(resnet_type, scope=None, verbosity=0): """ Load ResNet weights from a pretrained torchvision model and insert said weights into ResNet blobs Ensure that parameters have already been initialized at this point Args: resnet_type: The type of ResNet scope: The scope that ResNet blobs reside in. If None, core.ScopedName will be used verbosity: Level of logging to use 0 - Only Errors 1 - Start and end logged 2 - Skipped blobs logged """ assert resnet_type in valid_resnet_types, \ '{} resnet_type is invalid'.format(resnet_type) if verbosity >= 1: logger.info('Loading pretrained weights for {}'.format(resnet_type)) # Load a torch model and extract it's weights torch_model_fn = getattr(torchvision.models, resnet_type) torch_model = torch_model_fn(pretrained=True) torch_weights = torch_model.state_dict() blob_names = set(workspace.Blobs()) count_added = 0 total_num_weights = len(torch_weights) for weight_name in torch_weights.keys(): # Ignore running_mean, running_var and num_batches_tracked of batchnorm # Will be consolidated at batchnorm weights and bias if 'running_' in weight_name or 'num_batches_tracked' in weight_name: continue # Process weight name and check that weight exists in workspace processed_name = conversion.format_torch_name(weight_name) if scope is None: processed_name = core.ScopedName(processed_name) else: processed_name = scope + processed_name if processed_name not in blob_names: if verbosity >= 2: logger.warn('Skipping {}'.format(weight_name)) continue # Get pretrained weights from torch model # Batchnorm layers will have to be handled differently if conversion.is_batchnorm_bias(weight_name, torch_weights): pretrained_weight = conversion.get_caffe2_batchnorm_bias( weight_name, torch_weights) elif conversion.is_batchnorm_weight(weight_name, torch_weights): pretrained_weight = conversion.get_caffe2_batchnorm_weight( weight_name, torch_weights) else: # For conv layers we will be transfering weights directly pretrained_weight = torch_weights[weight_name].cpu().data.numpy() orig_weight = workspace.FetchBlob(processed_name) assert orig_weight.shape == pretrained_weight.shape, \ 'Oh no! Something went wrong, torch weights and caffe2 weights are different' workspace.FeedBlob(processed_name, pretrained_weight) count_added += 1 del torch_weights, torch_model torch.cuda.empty_cache() if verbosity >= 1: logger.info('{}/{} Pretrained weights are loaded'.format( count_added, total_num_weights))
def im_detect_mask(model, im_scale, boxes, timers=None): """Infer instance segmentation masks. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_masks (ndarray): R x K x M x M array of class specific soft masks output by the network (must be processed by segm_results to convert into hard masks in the original image coordinate space) """ if timers is None: timers = defaultdict(Timer) timers['data_mask'].tic() M = cfg.MRCNN.RESOLUTION if boxes.shape[0] == 0: pred_masks = np.zeros((0, M, M), np.float32) return pred_masks inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'mask_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) timers['data_mask'].toc() #run first time to warm up if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.mask_net.Proto().name) timers['run_mask'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = int(os.environ.get('INT8KLNUM')) if not kl_iter_num_for_range: kl_iter_num_for_range = 100 algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.mask_net.Proto()) else: workspace.RunNet(model.mask_net.Proto().name) timers['run_mask'].toc() timers['result_mask'].tic() # Fetch masks pred_masks = workspace.FetchBlob( core.ScopedName('mask_fcn_probs')).squeeze() if cfg.MRCNN.CLS_SPECIFIC_MASK: pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M]) else: pred_masks = pred_masks.reshape([-1, 1, M, M]) timers['result_mask'].toc() return pred_masks
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() src_blobs = load_object(weights_file) if 'cfg' in src_blobs: saved_cfg = load_cfg(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum = False has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.info( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype(np.float32, copy=False)) # let roidb continue with the data that is not seen yet. if 'roidb_state' in src_blobs and model.roi_data_loader is not None: model.roi_data_loader.set_perm_state(src_blobs['roidb_state']) del src_blobs['roidb_state'] else: logger.info("roidb state not loaded") if 'roidb_state' in src_blobs and model.roi_data_loader is None: del src_blobs['roidb_state'] if cfg.TRAIN.PADA: if 'weight_db' in src_blobs: import detectron.modeling.PADA as pada model.class_weight_db = pada.ClassWeightDB(*src_blobs['weight_db']) del src_blobs['weight_db'] elif 'weight_db' in src_blobs: del src_blobs['weight_db'] # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob('__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.info( '{:s} preserved in workspace (unused)'.format(src_name))
def feedBlob_run(model, im, entry): inputs, im_scale = _get_blobs(im, None, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, pose_model=None, entry=entry) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name)
def main(): # Initialize C2 workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']) # Set up logging and load config options logger = setup_logging(__name__) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info() logger.info("cuda version : {}".format(cuda_ver)) logger.info("cudnn version: {}".format(cudnn_ver)) logger.info("nvidia-smi output:\n{}".format(smi_output)) logger.info('Training with config:') logger.info(pprint.pformat(cfg)) # Note that while we set the numpy random seed network training will not be # deterministic in general. There are sources of non-determinism that cannot # be removed with a reasonble execution-speed tradeoff (such as certain # non-deterministic cudnn functions). np.random.seed(cfg.RNG_SEED) # test model logger.info("creat test model ...") test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS, gpu_id=0) logger.info("created test model ...") train_data = DataLoader(root, "train_id.txt", cfg, test_model, is_train=True) # creat mode model, weights_file, start_iter, checkpoints = create_model( True, cfg, output_dir) # test blob print(workspace.Blobs()) # create input blob blob_names = ['data_stage2', 'gt_label_stage2'] for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) # Override random weight initialization with weights from a saved model if weights_file: nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0) # Even if we're randomly initializing we still need to synchronize # parameters across GPUs nu.broadcast_parameters(model) workspace.CreateNet(model.net) logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir))) dump_proto_files(model, output_dir) writer = SummaryWriter(log_dir=output_dir) training_stats = TrainingStats(model, writer) CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS) logger.info("start train ...") for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER): # feed data # print("{} iter starting feed data...".format(cur_iter)) data_stage2, gt_label = train_data.next_batch() with c2_utils.NamedCudaScope(gpu_id): workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2) workspace.FeedBlob(core.ScopedName('gt_label_stage2'), gt_label) # print("workspace.RunNet(model.net.Proto().name)") training_stats.IterTic() lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter)) workspace.RunNet(model.net.Proto().name) if cur_iter == start_iter: nu.print_net(model) training_stats.IterToc() training_stats.UpdateIterStats(cur_iter) training_stats.LogIterStats(cur_iter, lr) writer.add_scalar('learning_rate', lr, cur_iter) # print("end of RunNet") if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter: checkpoints[cur_iter] = os.path.join( output_dir, 'model_iter{}.pkl'.format(cur_iter)) nu.save_model_to_weights_file(checkpoints[cur_iter], model) if cur_iter == start_iter + training_stats.LOG_PERIOD: # Reset the iteration timer to remove outliers from the first few # SGD iterations training_stats.ResetIterTimer() if np.isnan(training_stats.iter_total_loss): handle_critical_error(model, 'Loss is NaN') # Save the final model checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl') nu.save_model_to_weights_file(checkpoints['final'], model) # save train loss and metric state_file = os.path.join(output_dir, 'training_state.json') training_stats.SaveTrainingStates(state_file) # Execute the training run checkpoints = detectron.utils.train.train_model() # Test the trained model if not args.skip_test: test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() with open(weights_file, 'r') as f: src_blobs = pickle.load(f) if 'cfg' in src_blobs: saved_cfg = load_cfg(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.debug( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob GPU_NAME = 'gpu_0' ws_blob = workspace.FetchBlob(dst_name) if (dst_name!=GPU_NAME+'/cls_score_w') and (dst_name!=GPU_NAME+'/cls_score_b') \ and (dst_name!=GPU_NAME+'/bbox_pred_w') and (dst_name!=GPU_NAME+'/bbox_pred_b') \ and (dst_name!=GPU_NAME+'/mask_fcn_logits_w') and (dst_name!=GPU_NAME+'/mask_fcn_logits_b') : #add by shan # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob #print(dst_name, ws_blob.shape, src_name, src_blobs[src_name].shape) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) else: if dst_name == GPU_NAME + '/cls_score_w': temp = 0.0001 * np.random.randn(*(ws_blob.shape)) temp[0:4, :] = src_blobs[src_name][0:4, :] temp[5, :] = src_blobs[src_name][6, :] temp[6, :] = src_blobs[src_name][8, :] src_blobs[src_name] = temp if (dst_name == GPU_NAME + '/cls_score_b'): temp = -np.log((1 - 0.00001) / 0.00001) * np.ones( *(ws_blob.shape)) temp[0:4] = src_blobs[src_name][0:4] temp[5] = src_blobs[src_name][6] temp[6] = src_blobs[src_name][8] src_blobs[src_name] = temp if (dst_name == GPU_NAME + '/bbox_pred_w'): temp = 0.0001 * np.random.randn(*(ws_blob.shape)) temp[0:16, :] = src_blobs[src_name][0:16, :] temp[16:20, :] = src_blobs[src_name][24:28, :] temp[20:24, :] = src_blobs[src_name][32:36, :] src_blobs[src_name] = temp if (dst_name == GPU_NAME + '/bbox_pred_b'): temp = -np.log((1 - 0.00001) / 0.00001) * np.ones( *(ws_blob.shape)) temp[0:16] = src_blobs[src_name][0:16] temp[16:20] = src_blobs[src_name][24:28] temp[20:24] = src_blobs[src_name][32:36] src_blobs[src_name] = temp if dst_name == GPU_NAME + '/mask_fcn_logits_w': print(src_blobs[src_name].shape) temp = 0.0001 * np.random.randn(*(ws_blob.shape)) temp[0:4, :, :, :] = src_blobs[src_name][0:4, :, :, :] temp[5, :, :, :] = src_blobs[src_name][6, :, :, :] temp[6, :, :, :] = src_blobs[src_name][8, :, :, :] src_blobs[src_name] = temp #src_blobs[src_name + '_momentum'] = np.zeros(ws_blob.shape) #ws_blob.shape if dst_name == GPU_NAME + '/mask_fcn_logits_b': #print("--", src_blobs[src_name].shape) temp = -np.log((1 - 0.00001) / 0.00001) * np.ones( *(ws_blob.shape)) temp[0:4] = src_blobs[src_name][0:4] temp[5] = src_blobs[src_name][6] temp[6] = src_blobs[src_name][8] src_blobs[src_name] = temp src_blobs[src_name + '_momentum'] = np.zeros(ws_blob.shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype(np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob('__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.debug( '{:s} preserved in workspace (unused)'.format(src_name))
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) rois = workspace.FetchBlob(core.ScopedName('rois')) split = workspace.FetchBlob(core.ScopedName('roi_numbers')) # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.WHAT == 'coco': boxes = rois[:int(split[0]), 1:5] / im_scale scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() scores = scores.reshape([-1, scores.shape[-1]]) box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) elif cfg.TEST.WHAT == 'toothbrush': boxes = rois[int(split[0]):, 1:5] / im_scale scores = workspace.FetchBlob( core.ScopedName('cls_prob_toothbrush')).squeeze() box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred_toothbrush')).squeeze() scores = scores.reshape([-1, scores.shape[-1]]) box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) elif cfg.TEST.WHAT == 'toothbrush_rpn': boxes = rois[int(split[0]):, 1:5] / im_scale box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred_toothbrush')).squeeze() box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) pred_boxes = box_utils.bbox_transform(boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) scores = workspace.FetchBlob(core.ScopedName('roi_scores')) scores = scores[int(split[0]):] #for i in range(scores.shape[0]): # scores[i]=min(scores[i]+0.05,1.0) #print(min(scores)) scores = scores[:, np.newaxis] tmp = np.zeros(scores.shape, dtype=np.float32) scores = np.concatenate((tmp, scores), axis=1) return scores, pred_boxes, im_scale
def im_detect_bbox_batch(model, ims, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use ims (list): cfg.TEST.IMS_PER_BATCH color images to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scales = _get_blobs_batch(ims, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) ## # unscale back to raw image space ## boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # # In case there is 1 proposal # scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob( core.ScopedName('bbox_pred')).squeeze() scores_batch = [] pred_boxes_batch = [] for i in range(len(ims)): # select batch select_inds = np.where(rois[:, 0] == i) # unscale back to raw image space boxes = rois[select_inds, 1:5] / im_scales[i] boxes = boxes.reshape([-1, boxes.shape[-1]]) scores_i = scores[select_inds, :] scores_i = scores_i.reshape([-1, scores_i.shape[-1]]) scores_batch.append(scores_i) if cfg.TEST.BBOX_REG: # In case there is 1 proposal box_deltas_i = box_deltas[select_inds, :] box_deltas_i = box_deltas_i.reshape([-1, box_deltas_i.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas_i = box_deltas_i[:, -4:] pred_boxes = box_utils.bbox_transform(boxes, box_deltas_i, cfg.MODEL.BBOX_REG_WEIGHTS) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, ims[i].shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = (np.tile(pred_boxes, (1, scores_i.shape[1]))) pred_boxes_batch.append(pred_boxes) else: logger.error('Not implemented.') return None, None, None if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: raise NotImplementedError( 'Deduplication not implemented with batch inference, set TEST.IMS_PER_BATCH to 1' ) return scores_batch, pred_boxes_batch, im_scales
def im_detect_body_uv(model, im_scale, boxes): """Compute body uv predictions.""" M = cfg.BODY_UV_RCNN.HEATMAP_SIZE P = cfg.BODY_UV_RCNN.NUM_PATCHES if boxes.shape[0] == 0: pred_body_uvs = np.zeros((0, P, M, M), np.float32) return pred_body_uvs inputs = {'body_uv_rois': _get_rois_blob(boxes, im_scale)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'body_uv_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.body_uv_net.Proto().name) AnnIndex = workspace.FetchBlob(core.ScopedName('AnnIndex')).squeeze() Index_UV = workspace.FetchBlob(core.ScopedName('Index_UV')).squeeze() U_uv = workspace.FetchBlob(core.ScopedName('U_estimated')).squeeze() V_uv = workspace.FetchBlob(core.ScopedName('V_estimated')).squeeze() # In case of 1 if AnnIndex.ndim == 3: AnnIndex = np.expand_dims(AnnIndex, axis=0) if Index_UV.ndim == 3: Index_UV = np.expand_dims(Index_UV, axis=0) if U_uv.ndim == 3: U_uv = np.expand_dims(U_uv, axis=0) if V_uv.ndim == 3: V_uv = np.expand_dims(V_uv, axis=0) K = cfg.BODY_UV_RCNN.NUM_PATCHES + 1 outputs = [] for ind, entry in enumerate(boxes): # Compute ref box width and height bx = max(entry[2] - entry[0], 1) by = max(entry[3] - entry[1], 1) # preds[ind] axes are CHW; bring p axes to WHC CurAnnIndex = np.swapaxes(AnnIndex[ind], 0, 2) CurIndex_UV = np.swapaxes(Index_UV[ind], 0, 2) CurU_uv = np.swapaxes(U_uv[ind], 0, 2) CurV_uv = np.swapaxes(V_uv[ind], 0, 2) # Resize p from (HEATMAP_SIZE, HEATMAP_SIZE, c) to (int(bx), int(by), c) CurAnnIndex = cv2.resize(CurAnnIndex, (by, bx)) CurIndex_UV = cv2.resize(CurIndex_UV, (by, bx)) CurU_uv = cv2.resize(CurU_uv, (by, bx)) CurV_uv = cv2.resize(CurV_uv, (by, bx)) # Bring Cur_Preds axes back to CHW CurAnnIndex = np.swapaxes(CurAnnIndex, 0, 2) CurIndex_UV = np.swapaxes(CurIndex_UV, 0, 2) CurU_uv = np.swapaxes(CurU_uv, 0, 2) CurV_uv = np.swapaxes(CurV_uv, 0, 2) # Removed squeeze calls due to singleton dimension issues CurAnnIndex = np.argmax(CurAnnIndex, axis=0) CurIndex_UV = np.argmax(CurIndex_UV, axis=0) CurIndex_UV = CurIndex_UV * (CurAnnIndex > 0).astype(np.float32) output = np.zeros([3, int(by), int(bx)], dtype=np.float32) output[0] = CurIndex_UV for part_id in range(1, K): CurrentU = CurU_uv[part_id] CurrentV = CurV_uv[part_id] output[1, CurIndex_UV == part_id] = CurrentU[CurIndex_UV == part_id] output[2, CurIndex_UV == part_id] = CurrentV[CurIndex_UV == part_id] outputs.append(output) num_classes = cfg.MODEL.NUM_CLASSES cls_bodys = [[] for _ in range(num_classes)] person_idx = keypoint_utils.get_person_class_index() cls_bodys[person_idx] = outputs return cls_bodys
def initialize_gpu_0_from_weights_file(model, weights_file): """Initialize a network with ops on GPU 0. Note that we always use GPU 0 and rely on proper usage of CUDA_VISIBLE_DEVICES. """ logger.info('Loading from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() with open(weights_file, 'r') as f: src_blobs = pickle.load(f) if 'cfg' in src_blobs: saved_cfg = yaml.load(src_blobs['cfg'], Loader=yamlloader.ordereddict.CLoader) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU 0 only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True with c2_utils.NamedCudaScope(0): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[ unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.info('{:s}{:} loaded from weights file into {:s}: {}'. format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype( np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob( '__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.info( '{:s} preserved in workspace (unused)'.format(src_name))
def _test_std(self): root_dir = osp.join('/private', 'home', 'xinleic', 'pyramid') cfg_file = osp.join(root_dir, 'configs', 'visual_genome', 'e2e_faster_rcnn_R-50-FPN_1x.yaml') merge_cfg_from_file(cfg_file) cfg.NUM_GPUS = 1 cfg.TEST.RPN_PRE_NMS_TOP_N = 12000 cfg.TEST.RPN_POST_NMS_TOP_N = 2000 assert_and_infer_cfg() test_weight = osp.join(root_dir, 'outputs', 'train', 'visual_genome_train', 'e2e_faster_rcnn_R-50-FPN_1x', 'RNG_SEED#3', 'model_final.pkl') model = test_engine.initialize_model_from_cfg(test_weight, gpu_id=0) dataset = JsonDataset('visual_genome_val') roidb = dataset.get_roidb(gt=True) num_images = len(roidb) num_classes = cfg.MODEL.NUM_CLASSES entry = roidb[1] im = cv2.imread(entry['image']) max_level = cfg.FPN.RPN_MAX_LEVEL min_level = cfg.FPN.RPN_MIN_LEVEL # input: rpn_cls_probs_fpn2, rpn_bbox_pred_fpn2 # output: rpn_rois_fpn2, rpn_roi_probs_fpn2 with utils.c2.NamedCudaScope(0): # let's manually do the testing here inputs, im_scale = _get_blobs(im, None, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) cls_probs = [ core.ScopedName('rpn_cls_probs_fpn%d' % i) for i in range(min_level, max_level + 1) ] box_preds = [ core.ScopedName('rpn_bbox_pred_fpn%d' % i) for i in range(min_level, max_level + 1) ] rpn_rois = [ core.ScopedName('rpn_rois_fpn%d' % i) for i in range(min_level, max_level + 1) ] rpn_roi_probs = [ core.ScopedName('rpn_roi_probs_fpn%d' % i) for i in range(min_level, max_level + 1) ] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) rpn_rois = workspace.FetchBlobs(rpn_rois) rpn_roi_probs = workspace.FetchBlobs(rpn_roi_probs) rpn_rois = np.vstack(rpn_rois) rpn_roi_probs = np.vstack(rpn_roi_probs) # # remove the image dimension # rpn_rois = rpn_rois[:, 1:] # boxes = np.hstack([rpn_rois, rpn_roi_probs]) im_name = osp.splitext(osp.basename(entry['image']))[0] # utils.vis.vis_one_image(im[:, :, ::-1], # '{:s}-std-output'.format(im_name), # osp.join(root_dir, 'tests'), # boxes, # segms=None, # keypoints=None, # thresh=0., # box_alpha=0.8, # dataset=dataset, # show_class=False) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] * im_scale gt_classes = entry['gt_classes'][gt_inds] workspace.ResetWorkspace() im_info = inputs['im_info'].astype(np.float32) return rpn_rois, rpn_roi_probs, gt_boxes, gt_classes, im_info, im, im_name, root_dir, dataset
def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scale # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) #predict depths pred_depths = workspace.FetchBlob(core.ScopedName('depth_pred')).squeeze() # In case there is 1 proposal pred_depths = pred_depths.reshape([-1, pred_depths.shape[-1]]) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, pred_depths, im_scale
def im_conv_body_only(model, im): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale_factors = _get_image_blob(im) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale_factors