Exemple #1
0
 def __init__(self,
              feature_extractor: Im2VecEncoder,
              pooler_resolution: int = 7,
              pooler_sampling_ratio: int = 2,
              decoder_thresh: float = 0.1,
              decoder_nms_thresh: float = 0.5,
              decoder_detections_per_image: int = 100,
              matcher_high_thresh: float = 0.5,
              matcher_low_thresh: float = 0.5,
              allow_low_quality_matches: bool = True,
              batch_size_per_image: int = 256,
              balance_sampling_fraction: float = 0.25):
     super(FasterRCNNROIHead, self).__init__()
     self.roi_pool = MultiScaleRoIAlign(
         featmap_names=[0, 1, 2, 3],
         output_size=pooler_resolution,
         sampling_ratio=pooler_sampling_ratio)
     self.feature_extractor = feature_extractor
     self.box_coder = det_utils.BoxCoder(weights=(10., 10., 5., 5.))
     self.decoder_thresh = decoder_thresh
     self.decoder_nms_thresh = decoder_nms_thresh
     self.decoder_detections_per_image = decoder_detections_per_image
     self.proposal_matcher = det_utils.Matcher(
         high_threshold=matcher_high_thresh,
         low_threshold=matcher_low_thresh,
         allow_low_quality_matches=allow_low_quality_matches)
     self.sampler = det_utils.BalancedPositiveNegativeSampler(
         batch_size_per_image, positive_fraction=balance_sampling_fraction)
Exemple #2
0
    def __init__(
            self,
            anchor_generator,
            head,
            #
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            #
            pre_nms_top_n,
            post_nms_top_n,
            nms_thresh):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_generator = anchor_generator
        self.head = head
        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        # used during training
        self.box_similarity = box_ops.box_iou

        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,
            bg_iou_thresh,
            allow_low_quality_matches=False,
        )

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)
        # used during testing
        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
        self.min_size = 1e-3
Exemple #3
0
    def __init__(self, in_channels, num_anchors):
        super().__init__()

        conv = []
        for _ in range(4):
            conv.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            conv.append(nn.ReLU())
        self.conv = nn.Sequential(*conv)

        self.bbox_reg = nn.Conv2d(in_channels,
                                  num_anchors * 4,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)
        torch.nn.init.normal_(self.bbox_reg.weight, std=0.01)
        torch.nn.init.zeros_(self.bbox_reg.bias)

        for layer in self.conv.children():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight, std=0.01)
                torch.nn.init.zeros_(layer.bias)

        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))
Exemple #4
0
	def __init__(self,
				 RelDN,
				 box_roi_pool,
				 box_head,
				 box_predictor,
				 # Faster R-CNN training
				 fg_iou_thresh, bg_iou_thresh,
				 batch_size_per_image, positive_fraction,
				 bbox_reg_weights,
				 # Faster R-CNN inference
				 score_thresh,
				 nms_thresh,
				 detections_per_img,
				 # Mask
				 mask_roi_pool=None,
				 mask_head=None,
				 mask_predictor=None,
				 keypoint_roi_pool=None,
				 keypoint_head=None,
				 keypoint_predictor=None,
				 ):
		super(RoIHeads, self).__init__()

		batch_size_per_image_so = 64
		positive_fraction_so = 0.5
		self.box_similarity = box_ops.box_iou
		self.RelDN = RelDN
		# assign ground-truth boxes for each proposal
		self.proposal_matcher = det_utils.Matcher(
			fg_iou_thresh,
			bg_iou_thresh,
			allow_low_quality_matches=False)

		self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
			batch_size_per_image,
			positive_fraction)

		self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler(
			batch_size_per_image_so,
			positive_fraction_so)
			
		if bbox_reg_weights is None:
			bbox_reg_weights = (10., 10., 5., 5.)
		self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

		self.box_roi_pool = box_roi_pool
		self.box_head = box_head
		self.box_predictor = box_predictor

		self.score_thresh = score_thresh
		self.nms_thresh = nms_thresh
		self.detections_per_img = detections_per_img

		self.mask_roi_pool = mask_roi_pool
		self.mask_head = mask_head
		self.mask_predictor = mask_predictor

		self.keypoint_roi_pool = keypoint_roi_pool
		self.keypoint_head = keypoint_head
		self.keypoint_predictor = keypoint_predictor
 def __init__(self, score_thresh, nms_thresh, bbox_reg_weights,
              num_boxes_per_img):
     super().__init__()
     self.score_thresh = score_thresh
     self.nms_thresh = nms_thresh
     self.num_boxes_per_img = num_boxes_per_img
     self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
     pass
    def __init__(self,
                 box_roi_pool,
                 box_head,
                 box_predictor,
                 # Faster R-CNN training
                 fg_iou_thresh, bg_iou_thresh,
                 batch_size_per_image, positive_fraction,
                 bbox_reg_weights,
                 # Faster R-CNN inference
                 score_thresh,
                 nms_thresh,
                 detections_per_img,
                 # Mask
                 mask_coarse_head=None,
                 mask_point_head=None,
                 ):
        super(PointRendRoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,
            bg_iou_thresh,
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image,
            positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img


        self.mask_coarse_in_features=("0",) #对应FPN特征图中的P2
        self.mask_coarse_side_size=14
        self._feature_scales={'0':0.25,'1':0.125,'2':0.0625,'3':0.03125,'4':0.015625} #FPN每阶段特征尺寸与原图像尺寸的比例

        self.mask_coarse_head=mask_coarse_head

        self.mask_point_in_features             =["0"] #0对应FPN中的p2
        self.mask_point_train_num_points        =14*14
        self.mask_point_oversample_ratio        =3
        self.mask_point_importance_sample_ratio =0.75

        #next two parameters are use in the adaptive subdivions inference procedure
        self.mask_point_subdivision_steps         =5
        self.mask_point_subdivision_num_points    =28*28
        self.mask_point_head=mask_point_head
    def __init__(self, backbone, num_classes,
        min_size=800, max_size=1333,
        image_mean=None, image_std=None,
        anchor_generator=None, head=None,
        proposal_matcher=None,
        score_thresh=0.05,
        nms_thresh=0.5,
        detections_per_img=300,
        fg_iou_thresh=0.5, bg_iou_thresh=0.4,
        topk_candidates=1000):

        super(RetinaNet, self).__init__()

        if not hasattr(backbone, "out_channels"):
            raise ValueError("backbone should contain an attribute out_channels specifying the number of output channels "
                "assumed be the samefor all the levels")

        self.backbone = backbone

        assert isinstance(anchor_generator, (AnchorGenerator, type(None)))

        if anchor_generator is None:
            anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3))) for x in [32, 64, 128, 256, 512])
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        self.anchor_generator = anchor_generator

        if head is None:
            head = RetinaNetHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0], num_classes)
        self.head = head

        if proposal_matcher is None:
            proposal_matcher = det_utils.Matcher(
                fg_iou_thresh,
                bg_iou_thresh,
                allow_low_quality_matches = True,
            )
        self.proposal_matcher = proposal_matcher

        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img
        self.topk_candidates = topk_candidates

        self.has_warned = False
Exemple #8
0
    def __init__(self,
                 backbone: ImageEncoder,
                 positive_fraction: float = 0.5,
                 match_thresh_low: float = 0.3,
                 match_thresh_high: float = 0.7,
                 anchor_sizes: List[int] = (128, 256, 512),
                 anchor_aspect_ratios: List[float] = (0.5, 1.0, 2.0),
                 batch_size_per_image: int = 256,
                 pre_nms_top_n: int = 6000,
                 post_nms_top_n: int = 300,
                 nms_thresh: float = 0.7,
                 min_size: int = 0,
                 fpn_post_nms_top_n: int = 1000,
                 fpn_post_nms_per_batch: int = True,
                 allow_low_quality_matches: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator()) -> None:
        super(RPN, self).__init__(None)
        self._rpn_head = RPNHead(256, 3)
        self.min_size = min_size
        self.pre_nms_top_n = pre_nms_top_n
        self.nms_thresh = nms_thresh
        self.post_nms_top_n = post_nms_top_n

        # the BoxCoder just converts the relative regression offsets into absolute
        # coordinates
        self.box_coder = det_utils.BoxCoder(weights=(1., 1., 1., 1.))

        # sampler is responsible for selecting a subset of anchor boxes for computing the loss
        # this makes sure each batch has reasonable balance of foreground/background labels
        # it selects `batch_size_per_image` total boxes
        self.sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image,
                                                                 positive_fraction)

        # matcher decides if an anchor box is a foreground or background based on how much
        # it overlaps with the nearest target box
        self.proposal_matcher = det_utils.Matcher(
                match_thresh_high,
                match_thresh_low,
                allow_low_quality_matches=allow_low_quality_matches)

        self.backbone = backbone
        self.anchor_generator = AnchorGenerator(anchor_sizes, anchor_aspect_ratios)
        self.num_anchors = self.anchor_generator.num_anchors_per_location()[0]

        self._loss_meters = {'rpn_cls_loss': Average(), 'rpn_reg_loss': Average()}

        initializer(self)
Exemple #9
0
    def __init__(self):
        super(RoIHeads, self).__init__()

        self.box_roi_pool = MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3'],
            output_size=7,
            sampling_ratio=2)

        resolution = self.box_roi_pool.output_size[0]
        representation_size = 1024
        self.box_head = TwoMLPHead(256 * resolution**2, representation_size)
        self.rlp_head = copy.deepcopy(self.box_head)

        representation_size = 1024
        self.box_predictor = FastRCNNPredictor(representation_size,
                                               cfg.BOX.NUM_CLASSES)

        self.RelDN = reldn_heads.reldn_head(self.box_head.fc7.out_features *
                                            3)  # concat of SPO

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            cfg.BOX.FG_IOU_THRESH,
            cfg.BOX.BG_IOU_THRESH,
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            cfg.BOX.BATCH_SIZE_PER_IMAGE, cfg.BOX.POSITIVE_FRACTION)

        self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler(
            cfg.MODEL.BATCH_SIZE_PER_IMAGE_SO, cfg.MODEL.POSITIVE_FRACTION_SO)

        self.fg_bg_sampler_rlp = det_utils.BalancedPositiveNegativeSampler(
            cfg.MODEL.BATCH_SIZE_PER_IMAGE_REL,
            cfg.MODEL.POSITIVE_FRACTION_REL)

        bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
Exemple #10
0
    def __init__(
            self,
            box_roi_pool,
            box_head,
            box_predictor,
            # Faster R-CNN training
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            bbox_reg_weights,
            # new
            weight_loss=False,
            use_context=False,
            track_embedding=None):

        super(TrackHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)

        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.weight_loss = weight_loss
        self.use_context = use_context
        self.track_embedding = track_embedding
    def __init__(
            self,
            box_predictor,
            # Faster R-CNN training
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            bbox_reg_weights):
        super(SSDHead, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_predictor = box_predictor
    def __init__(
        self,
        box_roi_pool,
        box_head,
        box_predictor,
        # Faster R-CNN training
        fg_iou_thresh,
        bg_iou_thresh,
        batch_size_per_image,
        positive_fraction,
        bbox_reg_weights,
        # Faster R-CNN inference
        score_thresh,
        nms_thresh,
        detections_per_img,
        # Mask
        mask_roi_pool=None,
        mask_head=None,
        mask_predictor=None,
        keypoint_roi_pool=None,
        keypoint_head=None,
        keypoint_predictor=None,
    ):
        super(CascadeRoIHeads, self).__init__()

        self.num_cascade_stages = len(box_head)
        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh[0],
            bg_iou_thresh[0],
            allow_low_quality_matches=False)

        self.proposal_matchers = []
        for i in range(3):
            proposal_matcher = det_utils.Matcher(
                fg_iou_thresh[i],
                bg_iou_thresh[i],
                allow_low_quality_matches=False)
            self.proposal_matchers.append(proposal_matcher)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = [(10., 10., 5., 5.), (20., 20., 10., 10.),
                                (30., 30., 15., 15.)]
        self.box_coders = []
        for i in range(3):
            self.box_coders.append(det_utils.BoxCoder(bbox_reg_weights[i]))

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img

        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor
    def __init__(self,
                 out_channels,
                 num_classes,
                 input_mode,
                 acf_head,
                 fg_iou_thresh=0.5,
                 bg_iou_thresh=0.5,
                 batch_size_per_image=512,
                 positive_fraction=0.25,
                 bbox_reg_weights=None,
                 box_score_thresh=0.05,
                 box_nms_thresh=0.5,
                 box_detections_per_img=100):
        super(RoIHeadsExtend, self).__init__()

        self.in_channels = out_channels
        self.input_mode = input_mode
        self.score_thresh = box_score_thresh
        self.nms_thresh = box_nms_thresh
        self.detections_per_img = box_detections_per_img
        self.fg_iou_thresh = fg_iou_thresh
        self.bg_iou_thresh = bg_iou_thresh
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction
        self.num_classes = num_classes

        # Detection
        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                               output_size=7,
                                               sampling_ratio=2)

        representation_size = 1024
        resolution = self.box_roi_pool.output_size[0]
        self.box_head = TwoMLPHead(out_channels * resolution**2,
                                   representation_size)

        self.box_predictor = FastRCNNPredictor(representation_size,
                                               num_classes)

        # Segmentation
        self.shared_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                                  output_size=14,
                                                  sampling_ratio=2)
        resolution = self.shared_roi_pool.output_size[0]

        mask_layers = (256, 256, 256, 256, 256, 256, 256, 256)
        mask_dilation = 1
        self.mask_head = MaskRCNNHeads(out_channels, mask_layers,
                                       mask_dilation)

        mask_predictor_in_channels = 256  # == mask_layers[-1]
        mask_dim_reduced = 256
        self.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                mask_dim_reduced, num_classes)

        self.with_paf_branch = True
        if self.with_paf_branch:
            self.paf_head = MaskRCNNHeads(out_channels, mask_layers,
                                          mask_dilation)
            self.paf_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                   mask_dim_reduced,
                                                   2 * (num_classes - 1))

        if self.input_mode == config.INPUT_RGBD:
            self.attention_block = ContextBlock(256, 2)
            self.global_feature_dim = 256
            self.with_3d_keypoints = True
            self.with_axis_keypoints = False
            self.regress_axis = False
            self.estimate_norm_vector = False
            if acf_head == 'endpoints':
                self.with_axis_keypoints = True
            elif acf_head == 'scatters':
                self.regress_axis = True
            elif acf_head == 'norm_vector':
                self.estimate_norm_vector = True
            else:
                print("Don't assign a vaild acf head")
                exit()
            keypoint_layers = (256, ) * 4
            self.keypoint_dim_reduced = keypoint_layers[-1]
            if self.with_3d_keypoints:
                self.vote_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.vote_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
            if self.with_axis_keypoints:
                self.orientation_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")

                self.orientation_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 6 * (num_classes - 1))

            if self.regress_axis:
                self.axis_head = Vote_Kpoints_head(self.global_feature_dim,
                                                   keypoint_layers, "conv2d")
                self.axis_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 4 * (num_classes - 1))

            if self.estimate_norm_vector:
                self.norm_vector_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.norm_vector_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
Exemple #14
0
import torch
import pdetection.box as box
import torchvision.models.detection._utils as util
import random

coder_ref = util.BoxCoder([2, 2, 2, 2])
device = torch.device('cuda')
coder = box.BoxCoder([2, 2, 2, 2])
out = [torch.randint(0, 100, (4, 4), dtype=torch.float32, device=device)]
inp = [torch.randint(0, 100, (4, 4), dtype=torch.float32, device=device)]
print(coder_ref.decode(out, inp).squeeze(), '\n', coder.decode(out, inp)[0])
print(
    coder_ref.decode(out, inp).squeeze().shape, '\n',
    coder.decode(out, inp)[0].shape)
print(
    torch.sum(
        (coder_ref.decode(out, inp).squeeze() - coder.decode(out, inp)[0])))
print((coder_ref.decode(out, inp).squeeze() - coder.decode(out, inp)[0]).clamp(
    min=-999, max=999))
Exemple #15
0
    def __init__(
        self,
        box_roi_pool,
        box_head,
        box_predictor,
        # Faster R-CNN training
        fg_iou_thresh,
        bg_iou_thresh,
        batch_size_per_image,
        positive_fraction,
        bbox_reg_weights,
        # Faster R-CNN inference
        score_thresh,
        nms_thresh,
        detections_per_img,
        out_channels,
        # Mask
        mask_roi_pool=None,
        mask_head=None,
        mask_predictor=None,
        keypoint_roi_pool=None,
        keypoint_head=None,
        keypoint_predictor=None,
        pose_mean=None,
        pose_stddev=None,
        threed_68_points=None,
        threed_5_points=None,
        bbox_x_factor=1.1,
        bbox_y_factor=1.1,
        expand_forehead=0.3,
    ):
        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        num_classes = 2
        self.class_roi_pool = MultiScaleRoIAlign(
            featmap_names=["0", "1", "2", "3"],
            output_size=7,
            sampling_ratio=2)
        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        self.class_head = TwoMLPHead(out_channels * resolution**2,
                                     representation_size)
        self.class_predictor = FastRCNNClassPredictor(representation_size,
                                                      num_classes)
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img
        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor

        self.pose_mean = pose_mean
        self.pose_stddev = pose_stddev
        self.threed_68_points = threed_68_points
        self.threed_5_points = threed_5_points

        self.bbox_x_factor = bbox_x_factor
        self.bbox_y_factor = bbox_y_factor
        self.expand_forehead = expand_forehead
    def __init__(
            self,
            box_roi_pool,
            box_head,
            box_predictor,
            # Faster R-CNN training
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            bbox_reg_weights,
            # Faster R-CNN inference
            score_thresh,
            nms_thresh,
            detections_per_img,
            # Mask
            mask_roi_pool=None,
            mask_head=None,
            mask_predictor=None,
            keypoint_roi_pool=None,
            keypoint_head=None,
            keypoint_predictor=None,
            # new
            use_soft_nms=False,
            weight_loss=False,
            use_context=False,
            use_track_branch=False,
            track_embedding=None):

        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)

        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img

        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor

        self.weight_loss = weight_loss
        self.use_soft_nms = use_soft_nms
        self.use_context = use_context
Exemple #17
0
 def __init__(self):
     self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))