예제 #1
0
def segm_resnet50(segm_input_dim=(256, 256),
                  segm_inter_dim=(256, 256),
                  backbone_pretrained=True,
                  topk_pos=3,
                  topk_neg=3,
                  mixer_channels=2):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    # segmentation dimensions
    segm_input_dim = (64, 256, 512, 1024)
    segm_inter_dim = (4, 16, 32, 64)
    segm_dim = (64, 64)  # convolutions before cosine similarity

    # segmentation
    segm_predictor = segmmodels.SegmNet(segm_input_dim=segm_input_dim,
                                        segm_inter_dim=segm_inter_dim,
                                        segm_dim=segm_dim,
                                        topk_pos=topk_pos,
                                        topk_neg=topk_neg,
                                        mixer_channels=mixer_channels)

    net = SegmNet(feature_extractor=backbone_net,
                  segm_predictor=segm_predictor,
                  segm_layers=['conv1', 'layer1', 'layer2', 'layer3'],
                  extractor_grad=False)  # extractor_grad=False

    return net
예제 #2
0
def drnet_resnet50(iou_input_dim=(512, 1024),
                   iou_inter_dim=(256, 256),
                   backbone_pretrained=True):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)
    if backbone_pretrained:
        mod = torch.load(
            '/mnt/lustre/baishuai/experiment/pytracking_networks/rpn_r50_c4_2x-3d4c1e14.pth'
        )['state_dict']
        model_dict = backbone_net.state_dict()
        pretrained_dict = {}
        for k, v in mod.items():
            name = k.split('.')[1:]
            name = '.'.join(name)
            if name in model_dict and k.split('.')[0] != "rpn_head":
                # print(name)
                pretrained_dict[name] = v

        # pretrained_dict =  {k: v for k, v in other_state_dict.items() if k in model_dict and k.split('.')[0] != "mask_head"}

        model_dict.update(pretrained_dict)

        backbone_net.load_state_dict(model_dict, strict=True)

    # Bounding box regressor
    iou_predictor = bbmodels.DirectReg(input_dim=iou_input_dim,
                                       pred_inter_dim=iou_inter_dim)

    net = DRNet(feature_extractor=backbone_net,
                bb_regressor=iou_predictor,
                bb_regressor_layer=['layer2', 'layer3'],
                extractor_grad=False)

    return net
예제 #3
0
def SBDT_resnet50(input_dim=(512, 1024),
                  locator_inter_dim=(128, 256),
                  iou_input_dim=(256, 256),
                  iou_inter_dim=(256, 256),
                  backbone_pretrained=True):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    # Bounding box regressor
    iou_predictor = bbmodels.AtomIoUNet(input_dim=input_dim,
                                        pred_input_dim=iou_input_dim,
                                        pred_inter_dim=iou_inter_dim)

    # locator
    location_predictor = locmodels.OnlineRRNet50(
        input_dim=input_dim, pred_input_dim=locator_inter_dim)

    # SBDTNet
    net = SBDTNet(feature_extractor=backbone_net,
                  feature_layer=['layer2', 'layer3'],
                  bb_regressor=iou_predictor,
                  location_predictor=location_predictor,
                  extractor_grad=False)

    return net
예제 #4
0
 def __init__(self,
              output_layers,
              pretrained,
              frozen_layers):
     backbone = backbones.resnet50(output_layers=output_layers, pretrained=pretrained,
                                   frozen_layers=frozen_layers)
     num_channels = 1024
     super().__init__(backbone, num_channels)
예제 #5
0
def steepest_descent_learn_filter_resnet50_newiou(filter_size=1, optim_iter=3, optim_init_step=1.0, optim_init_reg=0.01, output_activation=None,
                                 classification_layer='layer3', backbone_pretrained=False, clf_feat_blocks=1,
                                 clf_feat_norm=True, init_filter_norm=False, final_conv=False,
                                 out_feature_dim=256, init_gauss_sigma=1.0, num_dist_bins=5, bin_displacement=1.0, test_loss=None,
                                           mask_init_factor=4.0, iou_input_dim=(256,256), iou_inter_dim=(256,256),
                                                  jitter_sigma_factor=None):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))

    
    # classifier
    clf_feature_extractor = clf_features.residual_bottleneck_comb(num_blocks=clf_feat_blocks, l2norm=clf_feat_norm,
                                                              final_conv=final_conv, norm_scale=norm_scale,
                                                              out_dim=out_feature_dim)
    initializer = clf_initializer.FilterInitializerLinear(filter_size=filter_size, filter_norm=init_filter_norm, feature_dim=out_feature_dim)
    optimizer = clf_optimizer.SteepestDescentLearn(num_iter=optim_iter, filter_size=filter_size, init_step_length=optim_init_step,
                                                   init_filter_reg=optim_init_reg, feature_dim=out_feature_dim,
                                                   init_gauss_sigma=init_gauss_sigma, num_dist_bins=num_dist_bins,
                                                   bin_displacement=bin_displacement, test_loss=test_loss, mask_init_factor=mask_init_factor)
    classifier = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer,
                                         filter_optimizer=optimizer, feature_extractor=clf_feature_extractor,
                                         output_activation=output_activation, jitter_sigma_factor=jitter_sigma_factor)    
    # Bounding box regressor
    # combine RGB and TIR by 2*
    bb_regressor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim)
    # load pretrained model
    pretrainmodel_path='/home/lichao/projects/pytracking_lichao/pytracking/DiMP_nets/sdlearn_300_onlytestloss_lr_causal_mg30_iou_nocf_res50_lfilt512_coco/OptimTracker_ep0040.pth.tar'
    pretrainmodel = loading.torch_load_legacy(pretrainmodel_path)['net']
    usepretrain = True; updback = True; updcls = True; updbb = True

    if usepretrain:
        if updback:
            # update backbone
            backbone_dict = backbone_net.state_dict()
            pretrain_dict = {k[len('feature_extractor.'):]: v for k, v in pretrainmodel.items() if k[len('feature_extractor.'):] in backbone_dict}
            backbone_net.load_state_dict(pretrain_dict)

        if updcls:
            # update classifier
            pretrainmodel['classifier.feature_extractor.0.weight']=torch.cat((pretrainmodel['classifier.feature_extractor.0.weight'],pretrainmodel['classifier.feature_extractor.0.weight']),1)
            classifier_dict = classifier.state_dict()
            pretrain_dict = {k[len('classifier.'):]: v for k, v in pretrainmodel.items() if k[len('classifier.'):] in classifier_dict}
            #classifier_dict.update(pretrain_dict)
            classifier.load_state_dict(pretrain_dict)
        if updbb:
            # update Bounding box regressor
            
            bb_regressor_dict = bb_regressor.state_dict()
            pretrain_dict = {k[len('bb_regressor.'):]: v for k, v in pretrainmodel.items() if k[len('bb_regressor.'):] in bb_regressor_dict}
            bb_regressor.load_state_dict(pretrain_dict)

    net = OptimTracker(feature_extractor=backbone_net, classifier=classifier, bb_regressor=bb_regressor,
                       classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3'])
    return net
예제 #6
0
def dimpnet50(filter_size=1, optim_iter=5, optim_init_step=1.0, optim_init_reg=0.01,
              classification_layer='layer3', feat_stride=16, backbone_pretrained=True, clf_feat_blocks=0,
              clf_feat_norm=True, init_filter_norm=False, final_conv=True,
              out_feature_dim=512, init_gauss_sigma=1.0, num_dist_bins=5, bin_displacement=1.0,
              mask_init_factor=4.0, iou_input_dim=(256, 256), iou_inter_dim=(256, 256),
              score_act='relu', act_param=None, target_mask_act='sigmoid',
              detach_length=float('Inf'), frozen_backbone_layers=()):

    # Backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained, frozen_layers=frozen_backbone_layers)

    # Feature normalization
    norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))

    # Classifier features
    if classification_layer == 'layer3':
        feature_dim = 256
    elif classification_layer == 'layer4':
        feature_dim = 512
    else:
        raise Exception

    clf_feature_extractor = clf_features.residual_bottleneck(feature_dim=feature_dim,
                                                             num_blocks=clf_feat_blocks, l2norm=clf_feat_norm,
                                                             final_conv=final_conv, norm_scale=norm_scale,
                                                             out_dim=out_feature_dim)

    # Initializer for the DiMP classifier
    initializer = clf_initializer.FilterInitializerLinear(filter_size=filter_size, filter_norm=init_filter_norm,
                                                          feature_dim=out_feature_dim)

    # Optimizer for the DiMP classifier
    optimizer = clf_optimizer.DiMPSteepestDescentGN(num_iter=optim_iter, feat_stride=feat_stride,
                                                    init_step_length=optim_init_step,
                                                    init_filter_reg=optim_init_reg, init_gauss_sigma=init_gauss_sigma,
                                                    num_dist_bins=num_dist_bins,
                                                    bin_displacement=bin_displacement,
                                                    mask_init_factor=mask_init_factor,
                                                    score_act=score_act, act_param=act_param, mask_act=target_mask_act,
                                                    detach_length=detach_length)

    ### Transformer
    init_transformer = transformer.Transformer(d_model=512, nhead=1, num_layers=1)

    # The classifier module
    classifier = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer,
                                         filter_optimizer=optimizer, feature_extractor=clf_feature_extractor, transformer=init_transformer)

    # Bounding box regressor
    bb_regressor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim)

    # DiMP network
    net = DiMPnet(feature_extractor=backbone_net, classifier=classifier, bb_regressor=bb_regressor,  
                  classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3'])
    return net
예제 #7
0
def atom_resnet50(iou_input_dim=(256,256), iou_inter_dim=(256,256), backbone_pretrained=True):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    # Bounding box regressor
    iou_predictor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim)

    net = ATOMnet(feature_extractor=backbone_net, bb_regressor=iou_predictor, bb_regressor_layer=['layer2', 'layer3'],
                  extractor_grad=False)

    return net
예제 #8
0
def klcedimpnet50(filter_size=1, optim_iter=5, optim_init_step=1.0, optim_init_reg=0.01,
                  classification_layer='layer3', feat_stride=16, backbone_pretrained=True, clf_feat_blocks=0,
                  clf_feat_norm=True, init_filter_norm=False, final_conv=True,
                  out_feature_dim=512, gauss_sigma=1.0,
                  iou_input_dim=(256, 256), iou_inter_dim=(256, 256),
                  detach_length=float('Inf'), alpha_eps=0.0, train_feature_extractor=True,
                  init_uni_weight=None, optim_min_reg=1e-3, init_initializer='default', normalize_label=False,
                  label_shrink=0, softmax_reg=None, label_threshold=0, final_relu=False, frozen_backbone_layers=()):

    if not train_feature_extractor:
        frozen_backbone_layers = 'all'

    # Backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained, frozen_layers=frozen_backbone_layers)

    # Feature normalization
    norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))

    # Classifier features
    clf_feature_extractor = clf_features.residual_bottleneck(num_blocks=clf_feat_blocks, l2norm=clf_feat_norm,
                                                             final_conv=final_conv, norm_scale=norm_scale,
                                                             out_dim=out_feature_dim, final_relu=final_relu)

    # Initializer for the DiMP classifier
    initializer = clf_initializer.FilterInitializerLinear(filter_size=filter_size, filter_norm=init_filter_norm,
                                                          feature_dim=out_feature_dim, init_weights=init_initializer)

    # Optimizer for the DiMP classifier
    optimizer = clf_optimizer.PrDiMPSteepestDescentNewton(num_iter=optim_iter, feat_stride=feat_stride,
                                                          init_step_length=optim_init_step,
                                                          init_filter_reg=optim_init_reg, gauss_sigma=gauss_sigma,
                                                          detach_length=detach_length, alpha_eps=alpha_eps,
                                                          init_uni_weight=init_uni_weight,
                                                          min_filter_reg=optim_min_reg, normalize_label=normalize_label,
                                                          label_shrink=label_shrink, softmax_reg=softmax_reg,
                                                          label_threshold=label_threshold)

    # The classifier module
    classifier = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer,
                                         filter_optimizer=optimizer, feature_extractor=clf_feature_extractor)

    # Bounding box regressor
    bb_regressor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim)

    # DiMP network
    net = DiMPnet(feature_extractor=backbone_net, classifier=classifier, bb_regressor=bb_regressor,
                  classification_layer=classification_layer, bb_regressor_layer=['layer2', 'layer3'])
    return net
예제 #9
0
파일: DRNet.py 프로젝트: danielism97/CFKD
def drnet_resnet50(iou_input_dim=(512, 1024),
                   iou_inter_dim=(256, 256),
                   backbone_pretrained=True):
    # backbone
    backbone_net = backbones.resnet50(
        output_layers=['conv1', 'layer1', 'layer2', 'layer3'],
        pretrained=backbone_pretrained)

    # Bounding box regressor
    iou_predictor = bbmodels.DirectReg(input_dim=iou_input_dim,
                                       pred_inter_dim=iou_inter_dim)

    net = DRNet(feature_extractor=backbone_net,
                bb_regressor=iou_predictor,
                bb_regressor_layer=['layer2', 'layer3'],
                extractor_grad=False,
                regressor_grad=False)

    return net
예제 #10
0
def atom_resnet50_mul_fpn(iou_input_dim=(256, 256),
                          iou_inter_dim=(256, 256),
                          backbone_pretrained=True,
                          share_rt=False):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    # Bounding box regressor
    iou_predictor = bbmodels.AtomMulFPNIoUNet(
        input_dim=(512, 1024),
        pred_input_dim=iou_input_dim,
        pred_inter_dim=iou_inter_dim,
        share_rt=share_rt,
    )

    net = ATOMnet(feature_extractor=backbone_net,
                  bb_regressor=iou_predictor,
                  bb_regressor_layer=['layer2', 'layer3'],
                  extractor_grad=False)

    return net
예제 #11
0
def depth_atom_resnet50(iou_input_dim=(256, 256),
                        iou_inter_dim=(256, 256),
                        backbone_pretrained=True):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    # depthNet
    depth_net = depth.depthResnet50()

    # Bounding box regressor
    iou_predictor = depthModels.DepthAtomIoUNet(input_dim=(4 * 256, 4 * 512),
                                                pred_input_dim=iou_input_dim,
                                                pred_inter_dim=iou_inter_dim)

    net = DepthATOMnet(feature_extractor=backbone_net,
                       depth_feature_extractor=depth_net,
                       bb_regressor=iou_predictor,
                       bb_regressor_layer=['layer2', 'layer3'],
                       extractor_grad=False)

    return net
예제 #12
0
def dimpnet50(filter_size=1,
              optim_iter=5,
              optim_init_step=1.0,
              optim_init_reg=0.01,
              classification_layer='layer3',
              feat_stride=16,
              backbone_pretrained=True,
              clf_feat_blocks=0,
              clf_feat_norm=True,
              init_filter_norm=False,
              final_conv=True,
              out_feature_dim=512,
              init_gauss_sigma=1.0,
              num_dist_bins=5,
              bin_displacement=1.0,
              mask_init_factor=4.0,
              iou_input_dim=(256, 256),
              iou_inter_dim=(256, 256),
              score_act='relu',
              act_param=None,
              target_mask_act='sigmoid',
              detach_length=float('Inf')):
    # Backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    # Feature normalization
    norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))

    # Classifier features
    clf_feature_extractor = clf_features.residual_bottleneck(
        num_blocks=clf_feat_blocks,
        l2norm=clf_feat_norm,
        final_conv=final_conv,
        norm_scale=norm_scale,
        out_dim=out_feature_dim)

    # Initializer for the DiMP classifier
    initializer = clf_initializer.FilterInitializerLinear(
        settings=settings,
        filter_size=filter_size,
        filter_norm=init_filter_norm,
        feature_dim=out_feature_dim)

    # Optimizer for the DiMP classifier
    optimizer = clf_optimizer.DiMPSteepestDescentGN(
        settings=settings,
        num_iter=optim_iter,
        feat_stride=feat_stride,
        init_step_length=optim_init_step,
        init_filter_reg=optim_init_reg,
        init_gauss_sigma=init_gauss_sigma,
        num_dist_bins=num_dist_bins,
        bin_displacement=bin_displacement,
        mask_init_factor=mask_init_factor,
        score_act=score_act,
        act_param=act_param,
        mask_act=target_mask_act,
        detach_length=detach_length)
    print(
        'Song in ltr.models.tracking.DiMPnet_rgbd_blend1.py line 233, before classifier, target_clf.LinearFilter ...'
    )
    # The classifier module
    classifier = target_clf.LinearFilter(
        settings=settings,
        filter_size=filter_size,
        filter_initializer=initializer,
        filter_optimizer=optimizer,
        feature_extractor=clf_feature_extractor)
    # Bounding box regressor for rgb
    bb_regressor = bbmodels.AtomIoUNet(settings=settings,
                                       input_dim=(4 * 128, 4 * 256),
                                       pred_input_dim=iou_input_dim,
                                       pred_inter_dim=iou_inter_dim)
    print(
        'Song in ltr.models.tracking.DiMPnet_rgbd_blend1.py line 240, dimpnet50 model_constructor ...'
    )
    # DiMP network
    net = DiMPnet_rgbd_blend1(settings=settings,
                              feature_extractor=backbone_net,
                              classifier=classifier,
                              bb_regressor=bb_regressor,
                              classification_layer=classification_layer,
                              bb_regressor_layer=['layer2', 'layer3'])
    return net
예제 #13
0
def steepest_descent_resnet50(filter_size=1,
                              num_filters=1,
                              optim_iter=3,
                              optim_init_reg=0.01,
                              backbone_pretrained=False,
                              clf_feat_blocks=1,
                              clf_feat_norm=True,
                              final_conv=False,
                              out_feature_dim=512,
                              target_model_input_layer='layer3',
                              decoder_input_layers=(
                                  "layer4",
                                  "layer3",
                                  "layer2",
                                  "layer1",
                              ),
                              detach_length=float('Inf'),
                              label_encoder_dims=(1, 1),
                              frozen_backbone_layers=(),
                              decoder_mdim=64,
                              filter_groups=1,
                              use_bn_in_label_enc=True,
                              dilation_factors=None,
                              backbone_type='imagenet'):
    # backbone feature extractor F
    if backbone_type == 'imagenet':
        backbone_net = backbones.resnet50(pretrained=backbone_pretrained,
                                          frozen_layers=frozen_backbone_layers)
    elif backbone_type == 'mrcnn':
        backbone_net = mrcnn_backbones.resnet50(
            pretrained=False, frozen_layers=frozen_backbone_layers)
    else:
        raise Exception

    norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))

    layer_channels = backbone_net.out_feature_channels()

    # Extracts features input to the target model
    target_model_feature_extractor = clf_features.residual_basic_block(
        feature_dim=layer_channels[target_model_input_layer],
        num_blocks=clf_feat_blocks,
        l2norm=clf_feat_norm,
        final_conv=final_conv,
        norm_scale=norm_scale,
        out_dim=out_feature_dim)

    # Few-shot label generator and weight predictor
    label_encoder = seg_label_encoder.ResidualDS16SW(
        layer_dims=label_encoder_dims + (num_filters, ),
        use_bn=use_bn_in_label_enc)

    # Predicts initial target model parameters
    initializer = seg_initializer.FilterInitializerZero(
        filter_size=filter_size,
        num_filters=num_filters,
        feature_dim=out_feature_dim,
        filter_groups=filter_groups)

    # Computes few-shot learning loss
    residual_module = loss_residual_modules.LWTLResidual(
        init_filter_reg=optim_init_reg,
        filter_dilation_factors=dilation_factors)

    # Iteratively updates the target model parameters by minimizing the few-shot learning loss
    optimizer = steepestdescent.GNSteepestDescent(
        residual_module=residual_module,
        num_iter=optim_iter,
        detach_length=detach_length,
        residual_batch_dim=1,
        compute_losses=True)

    # Target model and Few-shot learner
    target_model = target_clf.LinearFilter(
        filter_size=filter_size,
        filter_initializer=initializer,
        filter_optimizer=optimizer,
        feature_extractor=target_model_feature_extractor,
        filter_dilation_factors=dilation_factors)

    # Decoder
    decoder_input_layers_channels = {
        L: layer_channels[L]
        for L in decoder_input_layers
    }

    decoder = lwtl_decoder.LWTLDecoder(num_filters,
                                       decoder_mdim,
                                       decoder_input_layers_channels,
                                       use_bn=True)

    net = LWTLNet(feature_extractor=backbone_net,
                  target_model=target_model,
                  decoder=decoder,
                  label_encoder=label_encoder,
                  target_model_input_layer=target_model_input_layer,
                  decoder_input_layers=decoder_input_layers)
    return net
예제 #14
0
    def __init__(self, settings=None, filter_size=1, num_filters=1, optim_iter=3, optim_init_reg=0.01,
                 backbone_pretrained=False, clf_feat_blocks=1,
                 clf_feat_norm=True, final_conv=False,
                 out_feature_dim=512,
                 target_model_input_layer='layer3',
                 decoder_input_layers=("layer4", "layer3", "layer2", "layer1",),
                 detach_length=float('Inf'),
                 label_encoder_dims=(1, 1),
                 frozen_backbone_layers=(),
                 decoder_mdim=64, filter_groups=1,
                 use_bn_in_label_enc=True,
                 dilation_factors=None,
                 backbone_type='imagenet'):
        super().__init__()
        if settings is None:
            raise Exception("settings cannot be None")
        self.settings = settings
        ############## BUILD NET ###################
        # backbone feature extractor F
        if backbone_type == 'imagenet':
            backbone_net = backbones.resnet50(pretrained=backbone_pretrained, frozen_layers=frozen_backbone_layers)
        elif backbone_type == 'mrcnn':
            backbone_net = mrcnn_backbones.resnet50(pretrained=False, frozen_layers=frozen_backbone_layers)
        else:
            raise Exception

        norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))
        layer_channels = backbone_net.out_feature_channels()

        # Extracts features input to the target model
        target_model_feature_extractor = clf_features.residual_basic_block(
            feature_dim=layer_channels[target_model_input_layer],
            num_blocks=clf_feat_blocks, l2norm=clf_feat_norm,
            final_conv=final_conv, norm_scale=norm_scale,
            out_dim=out_feature_dim)

        # Few-shot label generator and weight predictor
        label_encoder = seg_label_encoder.ResidualDS16SW(layer_dims=label_encoder_dims + (num_filters,),
                                                         use_bn=use_bn_in_label_enc)

        # Predicts initial target model parameters
        initializer = seg_initializer.FilterInitializerZero(filter_size=filter_size, num_filters=num_filters,
                                                            feature_dim=out_feature_dim, filter_groups=filter_groups)

        # Computes few-shot learning loss
        residual_module = loss_residual_modules.LWTLResidual(init_filter_reg=optim_init_reg,
                                                             filter_dilation_factors=dilation_factors)

        # Iteratively updates the target model parameters by minimizing the few-shot learning loss
        optimizer = steepestdescent.GNSteepestDescent(residual_module=residual_module, num_iter=optim_iter,
                                                      detach_length=detach_length,
                                                      residual_batch_dim=1, compute_losses=True)

        # Target model and Few-shot learner
        target_model = target_clf.LinearFilter(filter_size=filter_size, filter_initializer=initializer,
                                               filter_optimizer=optimizer,
                                               feature_extractor=target_model_feature_extractor,
                                               filter_dilation_factors=dilation_factors)

        # Decoder
        decoder_input_layers_channels = {L: layer_channels[L] for L in decoder_input_layers}

        decoder = lwtl_decoder.LWTLDecoder(num_filters, decoder_mdim, decoder_input_layers_channels, use_bn=True)

        # build lwl model
        self.net = LWTLNet(feature_extractor=backbone_net, target_model=target_model, decoder=decoder,
                           label_encoder=label_encoder,
                           target_model_input_layer=target_model_input_layer, decoder_input_layers=decoder_input_layers)
        ############## BUILD NET ###################

        # Loss function
        self.objective = {
            'segm': LovaszSegLoss(per_image=False),
        }
        self.loss_weight = {
            'segm': 100.0
        }

        # actor初始化
        self.num_refinement_iter = 2
        self.disable_backbone_bn = False
        self.disable_all_bn = True
        # Load pre-trained maskrcnn weights
        self._load_pretrained_weights(settings)
예제 #15
0
def steepest_descent_learn_filter_resnet50_newiou(
        filter_size=1,
        optim_iter=3,
        optim_init_step=1.0,
        optim_init_reg=0.01,
        output_activation=None,
        classification_layer='layer3',
        backbone_pretrained=False,
        clf_feat_blocks=1,
        clf_feat_norm=True,
        init_filter_norm=False,
        final_conv=False,
        out_feature_dim=256,
        init_gauss_sigma=1.0,
        num_dist_bins=5,
        bin_displacement=1.0,
        test_loss=None,
        mask_init_factor=4.0,
        iou_input_dim=(256, 256),
        iou_inter_dim=(256, 256),
        jitter_sigma_factor=None):
    # backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    norm_scale = math.sqrt(1.0 / (out_feature_dim * filter_size * filter_size))

    # classifier
    clf_feature_extractor = clf_features.residual_bottleneck(
        num_blocks=clf_feat_blocks,
        l2norm=clf_feat_norm,
        final_conv=final_conv,
        norm_scale=norm_scale,
        out_dim=out_feature_dim)
    initializer = clf_initializer.FilterInitializerLinear(
        filter_size=filter_size,
        filter_norm=init_filter_norm,
        feature_dim=out_feature_dim)
    optimizer = clf_optimizer.SteepestDescentLearn(
        num_iter=optim_iter,
        filter_size=filter_size,
        init_step_length=optim_init_step,
        init_filter_reg=optim_init_reg,
        feature_dim=out_feature_dim,
        init_gauss_sigma=init_gauss_sigma,
        num_dist_bins=num_dist_bins,
        bin_displacement=bin_displacement,
        test_loss=test_loss,
        mask_init_factor=mask_init_factor)
    classifier = target_clf.LinearFilter(
        filter_size=filter_size,
        filter_initializer=initializer,
        filter_optimizer=optimizer,
        feature_extractor=clf_feature_extractor,
        output_activation=output_activation,
        jitter_sigma_factor=jitter_sigma_factor)

    # Bounding box regressor
    bb_regressor = bbmodels.AtomIoUNet(input_dim=(4 * 128, 4 * 256),
                                       pred_input_dim=iou_input_dim,
                                       pred_inter_dim=iou_inter_dim)

    net = OptimTracker(feature_extractor=backbone_net,
                       classifier=classifier,
                       bb_regressor=bb_regressor,
                       classification_layer=classification_layer,
                       bb_regressor_layer=['layer2', 'layer3'])
    return net
예제 #16
0
파일: kysnet.py 프로젝트: Suke0/AlphaRefine
def kysnet_res50(filter_size=4,
                 optim_iter=3,
                 appearance_feature_dim=512,
                 optim_init_step=0.9,
                 optim_init_reg=0.1,
                 classification_layer='layer3',
                 backbone_pretrained=True,
                 clf_feat_blocks=0,
                 clf_feat_norm=True,
                 final_conv=True,
                 init_filter_norm=False,
                 mask_init_factor=3.0,
                 score_act='relu',
                 target_mask_act='sigmoid',
                 num_dist_bins=100,
                 bin_displacement=0.1,
                 detach_length=float('Inf'),
                 train_feature_extractor=True,
                 train_iounet=True,
                 iou_input_dim=(256, 256),
                 iou_inter_dim=(256, 256),
                 cv_kernel_size=3,
                 cv_max_displacement=9,
                 cv_stride=1,
                 init_gauss_sigma=1.0,
                 state_dim=8,
                 representation_predictor_dims=(64, 32),
                 gru_ksz=3,
                 conf_measure='max',
                 dimp_thresh=None):

    # ######################## backbone ########################
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    norm_scale = math.sqrt(
        1.0 / (appearance_feature_dim * filter_size * filter_size))

    # ######################## classifier ########################
    clf_feature_extractor = clf_features.residual_bottleneck(
        num_blocks=clf_feat_blocks,
        l2norm=clf_feat_norm,
        final_conv=final_conv,
        norm_scale=norm_scale,
        out_dim=appearance_feature_dim)

    # Initializer for the DiMP classifier
    initializer = clf_initializer.FilterInitializerLinear(
        filter_size=filter_size,
        filter_norm=init_filter_norm,
        feature_dim=appearance_feature_dim)

    # Optimizer for the DiMP classifier
    optimizer = clf_optimizer.DiMPSteepestDescentGN(
        num_iter=optim_iter,
        feat_stride=16,
        init_step_length=optim_init_step,
        init_filter_reg=optim_init_reg,
        init_gauss_sigma=init_gauss_sigma,
        num_dist_bins=num_dist_bins,
        bin_displacement=bin_displacement,
        mask_init_factor=mask_init_factor,
        score_act=score_act,
        act_param=None,
        mask_act=target_mask_act,
        detach_length=detach_length)

    # The classifier module
    classifier = target_clf.LinearFilter(
        filter_size=filter_size,
        filter_initializer=initializer,
        filter_optimizer=optimizer,
        feature_extractor=clf_feature_extractor)

    # Bounding box regressor
    bb_regressor = bbmodels.AtomIoUNet(input_dim=(4 * 128, 4 * 256),
                                       pred_input_dim=iou_input_dim,
                                       pred_inter_dim=iou_inter_dim)

    cost_volume_layer = cost_volume.CostVolume(cv_kernel_size,
                                               cv_max_displacement,
                                               stride=cv_stride,
                                               abs_coordinate_output=True)

    motion_response_predictor = resp_pred.ResponsePredictor(
        state_dim=state_dim,
        representation_predictor_dims=representation_predictor_dims,
        gru_ksz=gru_ksz,
        conf_measure=conf_measure,
        dimp_thresh=dimp_thresh)

    response_predictor = predictor_wrappers.PredictorWrapper(
        cost_volume_layer, motion_response_predictor)

    net = KYSNet(backbone_feature_extractor=backbone_net,
                 dimp_classifier=classifier,
                 predictor=response_predictor,
                 bb_regressor=bb_regressor,
                 classification_layer=classification_layer,
                 bb_regressor_layer=['layer2', 'layer3'],
                 train_feature_extractor=train_feature_extractor,
                 train_iounet=train_iounet)
    return net
예제 #17
0
def fcotnet(clf_filter_size=4,
            reg_filter_size=3,
            optim_iter=5,
            optim_init_step=1.0,
            optim_init_reg=0.01,
            classification_layer='layer3',
            feat_stride=16,
            backbone_pretrained=True,
            clf_feat_blocks=0,
            clf_feat_norm=True,
            init_filter_norm=False,
            final_conv=True,
            out_feature_dim=512,
            norm_scale_coef=2,
            init_gauss_sigma=1.0,
            num_dist_bins=5,
            bin_displacement=1.0,
            mask_init_factor=4.0,
            score_act='relu',
            act_param=None,
            target_mask_act='sigmoid',
            detach_length=float('Inf'),
            train_cls_72_and_reg_init=True,
            train_reg_optimizer=False,
            train_cls_18=False):
    # Backbone
    backbone_net = backbones.resnet50(pretrained=backbone_pretrained)

    pyramid_first_conv = FPNUpBlock(res_channels=1024,
                                    planes=256,
                                    smooth_output=False,
                                    first_conv=True)

    up_36 = FPNUpBlock(res_channels=512,
                       planes=256,
                       smooth_output=False,
                       first_conv=False)

    up_72 = FPNUpBlock(res_channels=256,
                       planes=256,
                       smooth_output=True,
                       first_conv=False)

    # classifier_72
    norm_scale_72 = math.sqrt(norm_scale_coef /
                              (256 * clf_filter_size * clf_filter_size))
    clf_head_72 = clf_features.clf_head_72(feature_dim=256,
                                           l2norm=clf_feat_norm,
                                           norm_scale=norm_scale_72,
                                           out_dim=256,
                                           inner_dim=128)
    initializer_72 = clf_initializer.FilterInitializerLinear(
        filter_size=clf_filter_size,
        filter_norm=init_filter_norm,
        feature_dim=256,
        feature_stride=4)
    optimizer_72 = clf_optimizer.DiMPSteepestDescentGN(
        num_iter=optim_iter,
        feat_stride=4,
        init_step_length=optim_init_step,
        init_filter_reg=optim_init_reg,
        init_gauss_sigma=init_gauss_sigma,
        num_dist_bins=num_dist_bins,
        bin_displacement=bin_displacement,
        mask_init_factor=mask_init_factor,
        score_act=score_act,
        act_param=act_param,
        mask_act=target_mask_act,
        detach_length=detach_length)
    classifier_72 = target_clf.LinearFilter(filter_size=clf_filter_size,
                                            filter_initializer=initializer_72,
                                            filter_optimizer=optimizer_72,
                                            feature_extractor=clf_head_72)

    # classifier_18 (We use the same architecture of classifier_18 with DiMP.)
    norm_scale_18 = math.sqrt(
        1.0 / (out_feature_dim * clf_filter_size * clf_filter_size))
    clf_head_18 = clf_features.clf_head_18(num_blocks=clf_feat_blocks,
                                           l2norm=clf_feat_norm,
                                           final_conv=final_conv,
                                           norm_scale=norm_scale_18,
                                           out_dim=out_feature_dim)
    initializer_18 = clf_initializer.FilterInitializerLinear(
        filter_size=clf_filter_size,
        filter_norm=init_filter_norm,
        feature_dim=out_feature_dim)
    optimizer_18 = clf_optimizer.DiMPSteepestDescentGN(
        num_iter=optim_iter,
        feat_stride=feat_stride,
        init_step_length=optim_init_step,
        init_filter_reg=optim_init_reg,
        init_gauss_sigma=init_gauss_sigma,
        num_dist_bins=num_dist_bins,
        bin_displacement=bin_displacement,
        mask_init_factor=mask_init_factor,
        score_act=score_act,
        act_param=act_param,
        mask_act=target_mask_act,
        detach_length=detach_length)
    classifier_18 = target_clf.LinearFilter(filter_size=clf_filter_size,
                                            filter_initializer=initializer_18,
                                            filter_optimizer=optimizer_18,
                                            feature_extractor=clf_head_18)

    # regressor_72
    reg_optimizer_72 = reg_optimizer.RegSteepestDescentGN(
        num_iter=optim_iter,
        feat_stride=4,
        init_step_length=1.0,
        init_filter_reg=optim_init_reg,
        detach_length=detach_length)
    regressor_72 = RegFilter(
        pool_size=reg_filter_size,
        filter_dim=4,
        filter_channel=256,
        input_features_size=72,
        input_features_channel=256,
        inner_channel=128,
        filter_optimizer=reg_optimizer_72,
        train_reg_optimizer=train_reg_optimizer,
        train_cls_72_and_reg_init=train_cls_72_and_reg_init)

    # FCOT network
    net = FCOTNet(feature_extractor=backbone_net,
                  classification_layer=classification_layer,
                  pyramid_first_conv=pyramid_first_conv,
                  pyramid_36=up_36,
                  pyramid_72=up_72,
                  classifier_18=classifier_18,
                  classifier_72=classifier_72,
                  regressor_72=regressor_72,
                  train_reg_optimizer=train_reg_optimizer,
                  train_cls_18=train_cls_18,
                  train_cls_72_and_reg_init=train_cls_72_and_reg_init)
    return net