예제 #1
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'Siam selection for detection with default settings.'
    settings.print_interval = 1                                 # How often to print loss and other info
    settings.batch_size = 1                                    # Batch size
    assert settings.batch_size==1,"only implement for batch_size 1"
    settings.num_workers = 4                                    # Number of workers for image loading
    settings.normalize_mean = [0.485, 0.456, 0.406]             # Normalize mean (default pytorch ImageNet values)
    settings.normalize_std = [0.229, 0.224, 0.225]              # Normalize std (default pytorch ImageNet values)
    settings.search_area_factor = 5.0                           # Image patch size relative to target size
    settings.feature_sz = 18                                    # Size of feature map
    settings.output_sz = settings.feature_sz * 16               # Size of input image patches

    # Settings for the image sample and proposal generation
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}
    settings.proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]}

    # Train datasets
    lasot_train = Lasot(split='train')
    trackingnet_train = TrackingNet(set_ids=list(range(11)))
    # coco_train = MSCOCOSeq()

    # Validation datasets
    trackingnet_val = TrackingNet(set_ids=list(range(11,12)))

    # # The joint augmentation transform, that is applied to the pairs jointly
    # transform_joint = dltransforms.ToGrayscale(probability=0.05)
    #
    # # The augmentation transform applied to the training set (individually to each image in the pair)
    # transform_train = torchvision.transforms.Compose([dltransforms.ToTensorAndJitter(0.2),
    #                                                   torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)])
    #
    # # The augmentation transform applied to the validation set (individually to each image in the pair)
    # transform_val = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
    #                                                 torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)])

    # Data processing to do on the training pairs
    # data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor,
    #                                                   output_sz=settings.output_sz,
    #                                                   center_jitter_factor=settings.center_jitter_factor,
    #                                                   scale_jitter_factor=settings.scale_jitter_factor,
    #                                                   mode='sequence',
    #                                                   proposal_params=settings.proposal_params,
    #                                                   transform=transform_train,
    #                                                   joint_transform=transform_joint)
    #
    # # Data processing to do on the validation pairs
    # data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor,
    #                                                 output_sz=settings.output_sz,
    #                                                 center_jitter_factor=settings.center_jitter_factor,
    #                                                 scale_jitter_factor=settings.scale_jitter_factor,
    #                                                 mode='sequence',
    #                                                 proposal_params=settings.proposal_params,
    #                                                 transform=transform_val,
    #                                                 joint_transform=transform_joint)

    img_transform = ImageTransform(
        size_divisor=32, mean=[123.675, 116.28, 103.53],std=[58.395, 57.12, 57.375],to_rgb=True)
    data_processing=processing.SiamSelProcessing(transform=img_transform)

    # The sampler for training
    # dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1],
    #                                     samples_per_epoch=1000*settings.batch_size, max_gap=50*20,
    #                                     processing=data_processing)
    dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train], [1,3],
                                        samples_per_epoch=1000*settings.batch_size, max_gap=50*20,
                                        processing=data_processing)

    # The loader for training
    loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers,
                             shuffle=True, drop_last=True, stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50*20,
                                      processing=data_processing)

    # The loader for validation
    loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers,
                           shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1)

    # Create network
    # net = atom_models.atom_resnet18(backbone_pretrained=True)
    net=SiamSelNet()

    # Set objective
    objective = nn.BCEWithLogitsLoss()

    # Create actor, which wraps network and objective
    actor = actors.SiamSelActor(net=net, objective=objective)

    # Optimizer
    optimizer = optim.Adam(actor.net.selector.parameters(), lr=1e-4,weight_decay=0.0001)

    # Learning rate scheduler
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.2)
    # lr_scheduler = WarmupMultiStepLR(optimizer,[50*1000,80*1000])


    # Create trainer
    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(150, load_latest=True, fail_safe=True)

#larget frame gap
#without coco
#lasot : trackingnet 1:3
예제 #2
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'ATOM IoUNet with default settings according to the paper.'
    settings.batch_size = 64
    settings.num_workers = 8
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    trackingnet_val = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11,12)))

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2),
                                    tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))

    # The augmentation transform applied to the validation set (individually to each image in the pair)
    transform_val = tfm.Transform(tfm.ToTensor(),
                                  tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))

    # Data processing to do on the training pairs
    proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]}
    data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor,
                                                      output_sz=settings.output_sz,
                                                      center_jitter_factor=settings.center_jitter_factor,
                                                      scale_jitter_factor=settings.scale_jitter_factor,
                                                      mode='sequence',
                                                      proposal_params=proposal_params,
                                                      transform=transform_train,
                                                      joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor,
                                                    output_sz=settings.output_sz,
                                                    center_jitter_factor=settings.center_jitter_factor,
                                                    scale_jitter_factor=settings.scale_jitter_factor,
                                                    mode='sequence',
                                                    proposal_params=proposal_params,
                                                    transform=transform_val,
                                                    joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1],
                                samples_per_epoch=1000*settings.batch_size, max_gap=50, processing=data_processing_train)

    # The loader for training
    loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers,
                             shuffle=True, drop_last=True, stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50,
                              processing=data_processing_val)

    # The loader for validation
    loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers,
                           shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1)

    # Create network and actor
    net = atom_models.atom_resnet18(backbone_pretrained=True)
    objective = nn.MSELoss()
    actor = actors.AtomActor(net=net, objective=objective)

    # Optimizer
    optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2)

    # Create trainer
    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(50, load_latest=True, fail_safe=True)
예제 #3
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'ATOM IoUNet with ResNet18 backbone and trained with vid, lasot, coco.'
    settings.print_interval = 1  # How often to print loss and other info
    settings.batch_size = 64  # Batch size
    settings.num_workers = 4  # Number of workers for image loading
    settings.normalize_mean = [0.485, 0.456, 0.406
                               ]  # Normalize mean (default ImageNet values)
    settings.normalize_std = [0.229, 0.224,
                              0.225]  # Normalize std (default ImageNet values)
    settings.search_area_factor = 5.0  # Image patch size relative to target size
    settings.feature_sz = 18  # Size of feature map
    settings.output_sz = settings.feature_sz * 16  # Size of input image patches

    # Settings for the image sample and proposal generation
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}
    settings.proposal_params = {
        'min_iou': 0.1,
        'boxes_per_frame': 16,
        'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]
    }

    # Train datasets
    vid_train = ImagenetVID()
    lasot_train = Lasot(split='train')
    coco_train = MSCOCOSeq()

    # Validation datasets
    got10k_val = Got10k(split='val')

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = dltransforms.ToGrayscale(probability=0.05)

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = dltransforms.Compose([
        dltransforms.ToArrayAndJitter(0.2),
        dltransforms.Normalize(mean=settings.normalize_mean,
                               std=settings.normalize_std)
    ])

    # The augmentation transform applied to the validation set (individually to each image in the pair)
    transform_val = dltransforms.Compose([
        dltransforms.ToArray(),
        dltransforms.Normalize(mean=settings.normalize_mean,
                               std=settings.normalize_std)
    ])

    # Data processing to do on the training pairs
    data_processing_train = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=settings.proposal_params,
        transform=transform_train,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=settings.proposal_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler(
        [vid_train, lasot_train, coco_train], [1, 1, 1],
        samples_per_epoch=1000 * settings.batch_size,
        max_gap=50,
        processing=data_processing_train)

    # The loader for training
    train_loader = loader.LTRLoader('train',
                                    dataset_train,
                                    training=True,
                                    batch_size=settings.batch_size,
                                    num_workers=4,
                                    stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([got10k_val], [
        1,
    ],
                                      samples_per_epoch=500 *
                                      settings.batch_size,
                                      max_gap=50,
                                      processing=data_processing_val)

    # The loader for validation
    val_loader = loader.LTRLoader('val',
                                  dataset_val,
                                  training=False,
                                  batch_size=settings.batch_size,
                                  epoch_interval=5,
                                  num_workers=4,
                                  stack_dim=1)

    # creat network, set objective, creat optimizer, learning rate scheduler, trainer
    with dygraph.guard():
        # Create network
        net = atom_resnet18(backbone_pretrained=True)

        # Freeze backbone
        state_dicts = net.state_dict()
        for k in state_dicts.keys():
            if 'feature_extractor' in k and "running" not in k:
                state_dicts[k].stop_gradient = True

        # Set objective
        objective = fluid.layers.square_error_cost

        # Create actor, which wraps network and objective
        actor = actors.AtomActor(net=net, objective=objective)

        # Set to training mode
        actor.train()

        # define optimizer and learning rate
        gama = 0.2
        lr = 1e-3
        lr_scheduler = fluid.dygraph.PiecewiseDecay(
            [15, 30, 45],
            values=[lr, lr * gama, lr * gama * gama],
            step=1000,
            begin=0)

        optimizer = fluid.optimizer.Adam(
            parameter_list=net.bb_regressor.parameters(),
            learning_rate=lr_scheduler)

        trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer,
                             settings, lr_scheduler)
        trainer.train(40, load_latest=False, fail_safe=False)
예제 #4
0
def run(settings):
    settings.description = 'Default train settings for FCOT with ResNet50 as backbone.'
    settings.multi_gpu = True
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.output_sigma_factor = 1 / 4
    settings.clf_target_filter_sz = 4
    settings.reg_target_filter_sz = 3
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 3, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0.25, 'test': 0.5}
    settings.hinge_threshold = 0.05
    settings.logging_file = 'fcot_log.txt'

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir,
                                    set_ids=list(range(4)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    got10k_val = Got10k(settings.env.got10k_dir, split='votval')

    # Data transform
    transform_joint = dltransforms.ToGrayscale(probability=0.05)

    transform_train = torchvision.transforms.Compose([
        dltransforms.ToTensorAndJitter(0.2),
        torchvision.transforms.Normalize(mean=settings.normalize_mean,
                                         std=settings.normalize_std)
    ])

    transform_val = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=settings.normalize_mean,
                                         std=settings.normalize_std)
    ])

    # The tracking pairs processing module
    output_sigma = settings.output_sigma_factor / settings.search_area_factor
    proposal_params = {
        'min_iou': 0.1,
        'boxes_per_frame': 8,
        'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]
    }
    label_params = {
        'feature_sz': settings.feature_sz,
        'sigma_factor': output_sigma,
        'kernel_sz': settings.clf_target_filter_sz
    }
    data_processing_train = processing_fcot.AnchorFreeProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        output_spatial_scale=72 / 288.,
        proposal_params=proposal_params,
        label_function_params=label_params,
        transform=transform_train,
        joint_transform=transform_joint)

    data_processing_val = processing_fcot.AnchorFreeProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        output_spatial_scale=72 / 288.,
        proposal_params=proposal_params,
        label_function_params=label_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # Train sampler and loader
    dataset_train = sampler.FCOTSampler(
        [lasot_train, got10k_train, trackingnet_train, coco_train],
        [settings.lasot_rate, 1, 1, 1],
        samples_per_epoch=settings.samples_per_epoch,
        max_gap=30,
        num_test_frames=3,
        num_train_frames=3,
        processing=data_processing_train)

    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # Validation samplers and loaders
    dataset_val = sampler.FCOTSampler([got10k_val], [1],
                                      samples_per_epoch=5000,
                                      max_gap=30,
                                      num_test_frames=3,
                                      num_train_frames=3,
                                      processing=data_processing_val)

    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           epoch_interval=5,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           stack_dim=1)

    # Create network
    net = fcotnet.fcotnet(
        clf_filter_size=settings.clf_target_filter_sz,
        reg_filter_size=settings.reg_target_filter_sz,
        backbone_pretrained=True,
        optim_iter=5,
        norm_scale_coef=settings.norm_scale_coef,
        clf_feat_norm=True,
        clf_feat_blocks=0,
        final_conv=True,
        out_feature_dim=512,
        optim_init_step=0.9,
        optim_init_reg=0.1,
        init_gauss_sigma=output_sigma * settings.feature_sz,
        num_dist_bins=100,
        bin_displacement=0.1,
        mask_init_factor=3.0,
        target_mask_act='sigmoid',
        score_act='relu',
        train_reg_optimizer=settings.train_reg_optimizer,
        train_cls_72_and_reg_init=settings.train_cls_72_and_reg_init,
        train_cls_18=settings.train_cls_18)

    # Load dimp-model as initial weights
    device = torch.device('cuda:{}'.format(settings.devices_id[0]) if torch.
                          cuda.is_available() else 'cpu')
    if settings.use_pretrained_dimp:
        assert settings.pretrained_dimp50 is not None
        dimp50 = torch.load(settings.pretrained_dimp50, map_location=device)
        state_dict = collections.OrderedDict()
        for key, v in dimp50['net'].items():
            if key.split('.')[0] == 'feature_extractor':
                state_dict['.'.join(key.split('.')[1:])] = v

        net.feature_extractor.load_state_dict(state_dict)

        state_dict = collections.OrderedDict()
        for key, v in dimp50['net'].items():
            if key.split('.')[0] == 'classifier':
                state_dict['.'.join(key.split('.')[1:])] = v
        net.classifier_18.load_state_dict(state_dict)
        print("loading backbone and Classifier modules from DiMP50 done.")

    # Load fcot-model trained in the previous stage
    if settings.load_model:
        assert settings.fcot_model is not None
        load_dict = torch.load(settings.fcot_model)
        fcot_dict = net.state_dict()
        load_fcotnet_dict = {
            k: v
            for k, v in load_dict['net'].items() if k in fcot_dict
        }
        fcot_dict.update(load_fcotnet_dict)
        net.load_state_dict(fcot_dict)
        print("loading FCOT model done.")

    # Wrap the network for multi GPU training
    if settings.multi_gpu:
        net = MultiGPU(net, device_ids=settings.devices_id, dim=1).to(device)

    # Loss for cls_72, cls_18 and regression
    objective = {
        'test_clf_72': ltr_losses.LBHinge(threshold=settings.hinge_threshold),
        'test_clf_18': ltr_losses.LBHinge(threshold=settings.hinge_threshold),
        'reg_72': REGLoss(dim=4)
    }

    # Create actor and adam-optimizer
    if settings.train_cls_72_and_reg_init and settings.train_cls_18:
        ### train regression branch and clssification branches jointly, except for regression optimizer (TODO: fix)
        print("train cls_72, cls_18 and reg_init jointly...")
        loss_weight = {
            'test_clf_72': 100,
            'test_init_clf_72': 100,
            'test_iter_clf_72': 400,
            'test_clf_18': 100,
            'test_init_clf_18': 100,
            'test_iter_clf_18': 400,
            'reg_72': 1
        }
        actor = actors.FcotActor(net=net,
                                 objective=objective,
                                 loss_weight=loss_weight,
                                 device=device)
        optimizer = optim.Adam(
            [{
                'params':
                actor.net.classifier_72.filter_initializer.parameters(),
                'lr': 5e-5
            }, {
                'params':
                actor.net.classifier_72.filter_optimizer.parameters(),
                'lr': 5e-4
            }, {
                'params':
                actor.net.classifier_72.feature_extractor.parameters(),
                'lr': 5e-5
            }, {
                'params':
                actor.net.classifier_18.filter_initializer.parameters(),
                'lr': 5e-5
            }, {
                'params':
                actor.net.classifier_18.filter_optimizer.parameters(),
                'lr': 5e-4
            }, {
                'params':
                actor.net.classifier_18.feature_extractor.parameters(),
                'lr': 5e-5
            }, {
                'params': actor.net.regressor_72.parameters()
            }, {
                'params': actor.net.pyramid_first_conv.parameters()
            }, {
                'params': actor.net.pyramid_36.parameters()
            }, {
                'params': actor.net.pyramid_72.parameters()
            }, {
                'params': actor.net.feature_extractor.parameters(),
                'lr': 2e-5
            }],
            lr=2e-4)
        lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                      milestones=[35, 46, 60],
                                                      gamma=0.2)
    elif settings.train_cls_72_and_reg_init:
        # Setting of the first training stage: train backbone, cls_72 and regression (except for regression optimizer) branch.
        print("train cls_72 and reg_init...")
        loss_weight = {
            'test_clf_72': 100,
            'test_init_clf_72': 10,
            'test_iter_clf_72': 400,
            'test_clf_18': 0,
            'test_init_clf_18': 0,
            'test_iter_clf_18': 0,
            'reg_72': 0.3
        }
        actor = actors.FcotCls72AndRegInitActor(net=net,
                                                objective=objective,
                                                loss_weight=loss_weight,
                                                device=device)
        optimizer = optim.Adam(
            [{
                'params':
                actor.net.classifier_72.filter_initializer.parameters(),
                'lr': 5e-5
            }, {
                'params':
                actor.net.classifier_72.filter_optimizer.parameters(),
                'lr': 5e-4
            }, {
                'params':
                actor.net.classifier_72.feature_extractor.parameters(),
                'lr': 5e-5
            }, {
                'params': actor.net.regressor_72.parameters()
            }, {
                'params': actor.net.pyramid_first_conv.parameters()
            }, {
                'params': actor.net.pyramid_36.parameters()
            }, {
                'params': actor.net.pyramid_72.parameters()
            }, {
                'params': actor.net.feature_extractor.parameters(),
                'lr': 2e-5
            }],
            lr=2e-4)
        lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                      milestones=[35, 45, 69],
                                                      gamma=0.2)
    elif settings.train_reg_optimizer:
        # Setting of the second training stage: train regression optimizer.
        print("train regression optimizer...")
        loss_weight = {
            'test_reg_72': 1,
            'test_init_reg_72': 0,
            'test_iter_reg_72': 1
        }
        actor = actors.FcotOnlineRegressionActor(net=net,
                                                 objective=objective,
                                                 loss_weight=loss_weight,
                                                 device=device)
        optimizer = optim.Adam(
            [{
                'params': actor.net.regressor_72.filter_optimizer.parameters()
            }],
            lr=5e-4)
        lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                      milestones=[2],
                                                      gamma=0.2)
    elif settings.train_cls_18:
        print("train cls_18...")
        # Setting of the third training stage: train cls_18 branch.
        loss_weight = {
            'test_clf_18': 100,
            'test_init_clf_18': 100,
            'test_iter_clf_18': 400
        }
        actor = actors.FcotCls18Actor(net=net,
                                      objective=objective,
                                      loss_weight=loss_weight,
                                      device=device)
        optimizer = optim.Adam(
            [{
                'params':
                actor.net.classifier_18.filter_initializer.parameters(),
                'lr': 5e-5
            }, {
                'params':
                actor.net.classifier_18.filter_optimizer.parameters(),
                'lr': 5e-4
            }, {
                'params':
                actor.net.classifier_18.feature_extractor.parameters(),
                'lr': 5e-5
            }],
            lr=2e-4)
        lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                      milestones=[25],
                                                      gamma=0.2)
    else:
        # TODO: train jointly
        raise Exception("Please run training in correct way.")

    trainer = LTRFcotTrainer(actor, [loader_train, loader_val],
                             optimizer,
                             settings,
                             device,
                             lr_scheduler,
                             logging_file=settings.logging_file)

    trainer.train(settings.total_epochs, load_latest=True, fail_safe=True)
예제 #5
0
def run(settings):
    settings.description = 'Default train settings for DiMP with ResNet18 as backbone.'
    settings.batch_size = 26
    settings.num_workers = 8
    settings.multi_gpu = False
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.output_sigma_factor = 1 / 4
    settings.target_filter_sz = 4
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 3, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0.25, 'test': 0.5}
    settings.hinge_threshold = 0.05
    # settings.print_stats = ['Loss/total', 'Loss/iou', 'ClfTrain/init_loss', 'ClfTrain/test_loss']

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir,
                                    set_ids=list(range(4)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    got10k_val = Got10k(settings.env.got10k_dir, split='votval')

    # Data transform
    transform_joint = dltransforms.ToGrayscale(probability=0.05)

    transform_train = torchvision.transforms.Compose([
        dltransforms.ToTensorAndJitter(0.2),
        torchvision.transforms.Normalize(mean=settings.normalize_mean,
                                         std=settings.normalize_std)
    ])

    transform_val = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=settings.normalize_mean,
                                         std=settings.normalize_std)
    ])

    # The tracking pairs processing module
    output_sigma = settings.output_sigma_factor / settings.search_area_factor
    proposal_params = {
        'min_iou': 0.1,
        'boxes_per_frame': 8,
        'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]
    }
    label_params = {
        'feature_sz': settings.feature_sz,
        'sigma_factor': output_sigma,
        'kernel_sz': settings.target_filter_sz
    }
    data_processing_train = processing.DiMPProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=proposal_params,
        label_function_params=label_params,
        transform=transform_train,
        joint_transform=transform_joint)

    data_processing_val = processing.DiMPProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=proposal_params,
        label_function_params=label_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # Train sampler and loader
    dataset_train = sampler.DiMPSampler(
        [lasot_train, got10k_train, trackingnet_train, coco_train],
        [0.25, 1, 1, 1],
        samples_per_epoch=26000,
        max_gap=30,
        num_test_frames=3,
        num_train_frames=3,
        processing=data_processing_train)

    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # Validation samplers and loaders
    dataset_val = sampler.DiMPSampler([got10k_val], [1],
                                      samples_per_epoch=5000,
                                      max_gap=30,
                                      num_test_frames=3,
                                      num_train_frames=3,
                                      processing=data_processing_val)

    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           epoch_interval=5,
                           stack_dim=1)

    # Create network and actor
    net = dimpnet.dimpnet18(filter_size=settings.target_filter_sz,
                            backbone_pretrained=True,
                            optim_iter=5,
                            clf_feat_norm=True,
                            final_conv=True,
                            optim_init_step=0.9,
                            optim_init_reg=0.1,
                            init_gauss_sigma=output_sigma *
                            settings.feature_sz,
                            num_dist_bins=100,
                            bin_displacement=0.1,
                            mask_init_factor=3.0,
                            target_mask_act='sigmoid',
                            score_act='relu')

    # Wrap the network for multi GPU training
    if settings.multi_gpu:
        net = MultiGPU(net, dim=1)

    objective = {
        'iou': nn.MSELoss(),
        'test_clf': ltr_losses.LBHinge(threshold=settings.hinge_threshold)
    }

    loss_weight = {
        'iou': 1,
        'test_clf': 100,
        'test_init_clf': 100,
        'test_iter_clf': 400
    }

    actor = actors.DiMPActor(net=net,
                             objective=objective,
                             loss_weight=loss_weight)

    # Optimizer
    optimizer = optim.Adam(
        [{
            'params': actor.net.classifier.filter_initializer.parameters(),
            'lr': 5e-5
        }, {
            'params': actor.net.classifier.filter_optimizer.parameters(),
            'lr': 5e-4
        }, {
            'params': actor.net.classifier.feature_extractor.parameters(),
            'lr': 5e-5
        }, {
            'params': actor.net.bb_regressor.parameters(),
            'lr': 1e-3
        }, {
            'params': actor.net.feature_extractor.parameters()
        }],
        lr=2e-4)

    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=15,
                                             gamma=0.2)

    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer,
                         settings, lr_scheduler)

    trainer.train(50, load_latest=True, fail_safe=True)
예제 #6
0
def run(settings):
    settings.description = 'Default train settings for PrDiMP with ResNet50 as backbone.'
    settings.batch_size = 10
    settings.num_workers = 8
    settings.multi_gpu = False
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.output_sigma_factor = 1/4
    settings.target_filter_sz = 4
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 3, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0.25, 'test': 0.5}
    settings.hinge_threshold = 0.05
    settings.print_stats = ['Loss/total', 'Loss/bb_ce', 'ClfTrain/clf_ce']

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(4)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    got10k_val = Got10k(settings.env.got10k_dir, split='votval')


    # Data transform
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2),
                                    tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))

    transform_val = tfm.Transform(tfm.ToTensor(),
                                  tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))

    # The tracking pairs processing module
    output_sigma = settings.output_sigma_factor / settings.search_area_factor
    proposal_params = {'boxes_per_frame': 128, 'gt_sigma': (0.05, 0.05), 'proposal_sigma': [(0.05, 0.05), (0.5, 0.5)]}
    label_params = {'feature_sz': settings.feature_sz, 'sigma_factor': output_sigma, 'kernel_sz': settings.target_filter_sz}
    label_density_params = {'feature_sz': settings.feature_sz, 'sigma_factor': output_sigma, 'kernel_sz': settings.target_filter_sz, 'normalize': True}

    data_processing_train = processing.KLDiMPProcessing(search_area_factor=settings.search_area_factor,
                                                        output_sz=settings.output_sz,
                                                        center_jitter_factor=settings.center_jitter_factor,
                                                        scale_jitter_factor=settings.scale_jitter_factor,
                                                        mode='sequence',
                                                        proposal_params=proposal_params,
                                                        label_function_params=label_params,
                                                        label_density_params=label_density_params,
                                                        transform=transform_train,
                                                        joint_transform=transform_joint)

    data_processing_val = processing.KLDiMPProcessing(search_area_factor=settings.search_area_factor,
                                                      output_sz=settings.output_sz,
                                                      center_jitter_factor=settings.center_jitter_factor,
                                                      scale_jitter_factor=settings.scale_jitter_factor,
                                                      mode='sequence',
                                                      proposal_params=proposal_params,
                                                      label_function_params=label_params,
                                                      label_density_params=label_density_params,
                                                      transform=transform_val,
                                                      joint_transform=transform_joint)

    # Train sampler and loader
    dataset_train = sampler.DiMPSampler([lasot_train, got10k_train, trackingnet_train, coco_train], [0.25,1,1,1],
                                        samples_per_epoch=26000, max_gap=200, num_test_frames=3, num_train_frames=3,
                                        processing=data_processing_train)

    loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers,
                             shuffle=True, drop_last=True, stack_dim=1)

    # Validation samplers and loaders
    dataset_val = sampler.DiMPSampler([got10k_val], [1], samples_per_epoch=5000, max_gap=200,
                                      num_test_frames=3, num_train_frames=3,
                                      processing=data_processing_val)

    loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers,
                           shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1)

    # Create network and actor
    net = dimpnet.klcedimpnet50(filter_size=settings.target_filter_sz, backbone_pretrained=True, optim_iter=5,
                                clf_feat_norm=True, clf_feat_blocks=0, final_conv=True, out_feature_dim=512,
                                optim_init_step=1.0, optim_init_reg=0.05, optim_min_reg=0.05,
                                gauss_sigma=output_sigma * settings.feature_sz, alpha_eps=0.05, normalize_label=True, init_initializer='zero')

    # Wrap the network for multi GPU training
    if settings.multi_gpu:
        net = MultiGPU(net, dim=1)

    objective = {'bb_ce': klreg_losses.KLRegression(), 'clf_ce': klreg_losses.KLRegressionGrid()}

    loss_weight = {'bb_ce': 0.0025, 'clf_ce': 0.25, 'clf_ce_init': 0.25, 'clf_ce_iter': 1.0}

    actor = tracking_actors.KLDiMPActor(net=net, objective=objective, loss_weight=loss_weight)

    # Optimizer
    optimizer = optim.Adam([{'params': actor.net.classifier.parameters(), 'lr': 1e-3},
                            {'params': actor.net.bb_regressor.parameters(), 'lr': 1e-3},
                            {'params': actor.net.feature_extractor.parameters(), 'lr': 2e-5}],
                           lr=2e-4)

    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2)

    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler)

    trainer.train(50, load_latest=True, fail_safe=True)
예제 #7
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'ATOM IoUNet with default settings.'
    settings.print_interval = 1  # How often to print loss and other info
    settings.batch_size = 64  # Batch size
    settings.num_workers = 4  # Number of workers for image loading
    settings.normalize_mean = [
        0.485, 0.456, 0.406
    ]  # Normalize mean (default pytorch ImageNet values)
    settings.normalize_std = [
        0.229, 0.224, 0.225
    ]  # Normalize std (default pytorch ImageNet values)
    settings.search_area_factor = 5.0  # Image patch size relative to target size
    settings.feature_sz = 18  # Size of feature map
    settings.output_sz = settings.feature_sz * 16  # Size of input image patches

    # Settings for the image sample and proposal generation
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}
    settings.proposal_params = {
        'min_iou': 0.1,
        'boxes_per_frame': 16,
        'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]
    }

    # Train datasets
    lasot_train = Lasot(split='train')
    trackingnet_train = TrackingNet(set_ids=list(range(11)))
    coco_train = MSCOCOSeq()

    # Validation datasets
    trackingnet_val = TrackingNet(set_ids=list(range(11, 12)))

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = dltransforms.ToGrayscale(probability=0.05)

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = torchvision.transforms.Compose([
        dltransforms.ToTensorAndJitter(0.2),
        torchvision.transforms.Normalize(mean=settings.normalize_mean,
                                         std=settings.normalize_std)
    ])

    # The augmentation transform applied to the validation set (individually to each image in the pair)
    transform_val = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=settings.normalize_mean,
                                         std=settings.normalize_std)
    ])

    # Data processing to do on the training pairs
    data_processing_train = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=settings.proposal_params,
        transform=transform_train,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=settings.proposal_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler(
        [lasot_train, trackingnet_train, coco_train], [1, 1, 1],
        samples_per_epoch=1800 * settings.batch_size,
        max_gap=50,
        processing=data_processing_train)

    # The loader for training
    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([trackingnet_val], [1],
                                      samples_per_epoch=500 *
                                      settings.batch_size,
                                      max_gap=50,
                                      processing=data_processing_val)

    # The loader for validation
    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           epoch_interval=5,
                           stack_dim=1)

    # Create network
    net = atom_models.atom_resnet50(backbone_pretrained=True)
    # Set objective
    objective = nn.MSELoss()

    # Create actor, which wraps network and objective
    actor = actors.AtomActor(net=net, objective=objective)

    # Optimizer
    optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3)

    # Learning rate scheduler
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=15,
                                             gamma=0.2)

    # Create trainer
    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer,
                         settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(40, load_latest=True, fail_safe=False)
예제 #8
0
파일: kys.py 프로젝트: Suke0/AlphaRefine
def run(settings):
    settings.move_data_to_gpu = False
    settings.description = ''
    settings.batch_size = 10
    settings.test_sequence_length = 50
    settings.num_workers = 8
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.output_sigma_factor = 1 / 4
    settings.target_filter_sz = 4
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_param = {
        'train_mode': 'uniform',
        'train_factor': 3.0,
        'train_limit_motion': False,
        'test_mode': 'uniform',
        'test_factor': 4.5,
        'test_limit_motion': True
    }
    settings.scale_jitter_param = {'train_factor': 0.25, 'test_factor': 0.3}
    settings.hinge_threshold = 0.05
    settings.print_stats = [
        'Loss/total', 'Loss/raw/test_clf', 'Loss/raw/test_clf_acc',
        'Loss/raw/dimp_clf_acc', 'Loss/raw/is_target',
        'Loss/raw/is_target_after_prop', 'Loss/raw/test_seq_acc',
        'Loss/raw/dimp_seq_acc'
    ]

    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir,
                                    set_ids=[0, 1, 2, 3, 4])

    # Validation datasets
    got10k_val = Got10k(settings.env.got10k_dir, split='votval')

    # Data transform
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    transform_train = tfm.Transform(
        tfm.ToTensorAndJitter(0.2),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    transform_val = tfm.Transform(
        tfm.ToTensor(),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    # The tracking pairs processing module
    output_sigma = settings.output_sigma_factor / settings.search_area_factor
    proposal_params = None

    label_params = {
        'feature_sz': settings.feature_sz,
        'sigma_factor': output_sigma,
        'kernel_sz': settings.target_filter_sz,
        'end_pad_if_even': True
    }

    data_processing_train = processing.KYSProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_param=settings.center_jitter_param,
        scale_jitter_param=settings.scale_jitter_param,
        proposal_params=proposal_params,
        label_function_params=label_params,
        transform=transform_train,
        joint_transform=transform_joint,
        min_crop_inside_ratio=0.1)

    data_processing_val = processing.KYSProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_param=settings.center_jitter_param,
        scale_jitter_param=settings.scale_jitter_param,
        proposal_params=proposal_params,
        label_function_params=label_params,
        transform=transform_val,
        joint_transform=transform_joint,
        min_crop_inside_ratio=0.1)

    # Train sampler and loader
    sequence_sample_info = {
        'num_train_frames': 3,
        'num_test_frames': settings.test_sequence_length,
        'max_train_gap': 30,
        'allow_missing_target': True,
        'min_fraction_valid_frames': 0.5,
        'mode': 'Sequence'
    }

    dataset_train = sampler.KYSSampler(
        [got10k_train, trackingnet_train, lasot_train], [0.3, 0.3, 0.25],
        samples_per_epoch=settings.batch_size * 150,
        sequence_sample_info=sequence_sample_info,
        processing=data_processing_train,
        sample_occluded_sequences=True)

    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # Validation samplers and loaders
    dataset_val = sampler.KYSSampler([got10k_val], [1],
                                     samples_per_epoch=1000,
                                     sequence_sample_info=sequence_sample_info,
                                     processing=data_processing_val,
                                     sample_occluded_sequences=True)

    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           epoch_interval=5,
                           stack_dim=1)

    # load base dimp
    dimp_weights_path = os.path.join(settings.env.pretrained_networks,
                                     'dimp50.pth')
    base_net, _ = network_loading.load_network(checkpoint=dimp_weights_path)

    net = kysnet_models.kysnet_res50(optim_iter=3,
                                     cv_kernel_size=3,
                                     cv_max_displacement=9,
                                     cv_stride=1,
                                     init_gauss_sigma=output_sigma *
                                     settings.feature_sz,
                                     train_feature_extractor=False,
                                     train_iounet=False,
                                     detach_length=0,
                                     state_dim=8,
                                     representation_predictor_dims=(16, ),
                                     conf_measure='entropy',
                                     dimp_thresh=0.05)

    # Move pre-trained dimp weights
    net.backbone_feature_extractor.load_state_dict(
        base_net.feature_extractor.state_dict())
    net.dimp_classifier.load_state_dict(base_net.classifier.state_dict())
    net.bb_regressor.load_state_dict(base_net.bb_regressor.state_dict())

    # To be safe
    for p in net.backbone_feature_extractor.parameters():
        p.requires_grad_(False)
    for p in net.dimp_classifier.parameters():
        p.requires_grad_(False)
    for p in net.bb_regressor.parameters():
        p.requires_grad_(False)

    objective = {
        'test_clf':
        ltr_losses.LBHingev2(threshold=settings.hinge_threshold,
                             return_per_sequence=False),
        'dimp_clf':
        ltr_losses.LBHingev2(threshold=settings.hinge_threshold,
                             return_per_sequence=False),
        'is_target':
        ltr_losses.IsTargetCellLoss(return_per_sequence=False),
        'clf_acc':
        ltr_losses.TrackingClassificationAccuracy(threshold=0.25)
    }

    loss_weight = {
        'test_clf': 1.0 * 500,
        'test_clf_orig': 50,
        'is_target': 0.1 * 500,
        'is_target_after_prop': 0.1 * 500
    }

    dimp_jitter_fn = DiMPScoreJittering(distractor_ratio=0.1,
                                        p_distractor=0.3,
                                        max_distractor_enhance_factor=1.3,
                                        min_distractor_enhance_factor=0.8)
    actor = actors.KYSActor(net=net,
                            objective=objective,
                            loss_weight=loss_weight,
                            dimp_jitter_fn=dimp_jitter_fn)

    optimizer = optim.Adam([{
        'params': actor.net.predictor.parameters(),
        'lr': 1e-2
    }],
                           lr=1e-2)

    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=15,
                                             gamma=0.2)

    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer,
                         settings, lr_scheduler)

    trainer.train(40, load_latest=True, fail_safe=True)
예제 #9
0
def run(settings):
    settings.description = 'Transformer-assisted tracker. Our baseline approach is SuperDiMP'
    settings.batch_size = 40
    settings.num_workers = 8
    settings.multi_gpu = True
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 6.0
    settings.output_sigma_factor = 1 / 4
    settings.target_filter_sz = 4
    settings.feature_sz = 22
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 3, 'test': 5.5}
    settings.scale_jitter_factor = {'train': 0.25, 'test': 0.5}
    settings.hinge_threshold = 0.05
    # settings.print_stats = ['Loss/total', 'Loss/iou', 'ClfTrain/init_loss', 'ClfTrain/test_loss']

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir,
                                    set_ids=list(range(4)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    got10k_val = Got10k(settings.env.got10k_dir, split='votval')

    # Data transform
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05),
                                    tfm.RandomHorizontalFlip(probability=0.5))

    transform_train = tfm.Transform(
        tfm.ToTensorAndJitter(0.2), tfm.RandomHorizontalFlip(probability=0.5),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    transform_val = tfm.Transform(
        tfm.ToTensor(),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    # The tracking pairs processing module
    output_sigma = settings.output_sigma_factor / settings.search_area_factor
    proposal_params = {
        'boxes_per_frame': 128,
        'gt_sigma': (0.05, 0.05),
        'proposal_sigma': [(0.05, 0.05), (0.5, 0.5)]
    }
    label_params = {
        'feature_sz': settings.feature_sz,
        'sigma_factor': output_sigma,
        'kernel_sz': settings.target_filter_sz
    }
    label_density_params = {
        'feature_sz': settings.feature_sz,
        'sigma_factor': output_sigma,
        'kernel_sz': settings.target_filter_sz
    }

    data_processing_train = processing.KLDiMPProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        crop_type='inside_major',
        max_scale_change=1.5,
        mode='sequence',
        proposal_params=proposal_params,
        label_function_params=label_params,
        label_density_params=label_density_params,
        transform=transform_train,
        joint_transform=transform_joint)

    data_processing_val = processing.KLDiMPProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        crop_type='inside_major',
        max_scale_change=1.5,
        mode='sequence',
        proposal_params=proposal_params,
        label_function_params=label_params,
        label_density_params=label_density_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # Train sampler and loader
    dataset_train = sampler.DiMPSampler(
        [lasot_train, got10k_train, trackingnet_train, coco_train],
        [1, 1, 1, 1],
        samples_per_epoch=50000,
        max_gap=500,
        num_test_frames=3,
        num_train_frames=3,
        processing=data_processing_train)

    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # Validation samplers and loaders
    dataset_val = sampler.DiMPSampler([got10k_val], [1],
                                      samples_per_epoch=10000,
                                      max_gap=500,
                                      num_test_frames=3,
                                      num_train_frames=3,
                                      processing=data_processing_val)

    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           epoch_interval=5,
                           stack_dim=1)

    # Create network and actor
    net = dimpnet.dimpnet50(
        filter_size=settings.target_filter_sz,
        backbone_pretrained=True,
        optim_iter=5,
        clf_feat_norm=True,
        clf_feat_blocks=0,
        final_conv=True,
        out_feature_dim=512,
        optim_init_step=0.9,
        optim_init_reg=0.1,
        init_gauss_sigma=output_sigma * settings.feature_sz,
        num_dist_bins=100,
        bin_displacement=0.1,
        mask_init_factor=3.0,
        target_mask_act='sigmoid',
        score_act='relu',
        frozen_backbone_layers=['conv1', 'bn1', 'layer1', 'layer2'])

    # Wrap the network for multi GPU training
    if settings.multi_gpu:
        net = MultiGPU(net, dim=1)

    objective = {
        'bb_ce': klreg_losses.KLRegression(),
        'test_clf': ltr_losses.LBHinge(threshold=settings.hinge_threshold)
    }

    loss_weight = {
        'bb_ce': 0.01,
        'test_clf': 100,
        'test_init_clf': 100,
        'test_iter_clf': 400
    }

    actor = tracking_actors.KLDiMPActor(net=net,
                                        objective=objective,
                                        loss_weight=loss_weight)

    # Optimizer
    optimizer = optim.Adam(
        [{
            'params': actor.net.classifier.filter_initializer.parameters(),
            'lr': 5e-5
        }, {
            'params': actor.net.classifier.filter_optimizer.parameters(),
            'lr': 5e-4
        }, {
            'params': actor.net.classifier.feature_extractor.parameters(),
            'lr': 5e-5
        }, {
            'params': actor.net.classifier.transformer.parameters(),
            'lr': 1e-3
        }, {
            'params': actor.net.bb_regressor.parameters(),
            'lr': 1e-3
        }, {
            'params': actor.net.feature_extractor.layer3.parameters(),
            'lr': 2e-5
        }],
        lr=2e-4)

    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=15,
                                             gamma=0.2)

    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer,
                         settings, lr_scheduler)

    trainer.train(50, load_latest=True, fail_safe=True)
예제 #10
0
파일: sep.py 프로젝트: danielism97/CFKD
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'distilled ATOM IoUNet with default settings according to the paper.'
    settings.batch_size = 32
    settings.num_workers = 8
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir,
                                    set_ids=list(range(11)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    trackingnet_val = TrackingNet(settings.env.trackingnet_dir,
                                  set_ids=list(range(11, 12)))

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = tfm.Transform(
        tfm.ToTensorAndJitter(0.2),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    # The augmentation transform applied to the validation set (individually to each image in the pair)
    transform_val = tfm.Transform(
        tfm.ToTensor(),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    # Data processing to do on the training pairs
    proposal_params = {
        'min_iou': 0.1,
        'boxes_per_frame': 16,
        'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]
    }
    data_processing_train = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=proposal_params,
        transform=transform_train,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=proposal_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler(
        [lasot_train, trackingnet_train, coco_train], [1, 1, 1],
        samples_per_epoch=1000 * settings.batch_size,
        max_gap=50,
        processing=data_processing_train)

    # The loader for training
    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([trackingnet_val], [1],
                                      samples_per_epoch=500 *
                                      settings.batch_size,
                                      max_gap=50,
                                      processing=data_processing_val)

    # The loader for validation
    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           epoch_interval=5,
                           stack_dim=1)

    # Load teacher network
    teacher_net = atom_models.atom_resnet18(backbone_pretrained=True)
    teacher_path = '/home/ddanier/CFKD/pytracking/networks/atom_default.pth'
    teacher_net = loading.load_weights(teacher_net, teacher_path, strict=True)
    print(
        '*******************Teacher net loaded successfully*******************'
    )

    # Create student network and actor
    student_net = atom_models.atom_mobilenetsmall(backbone_pretrained=False)

    ##########################################################
    ### Distil backbone first, turn off grad for regressor ###
    ##########################################################
    for p in student_net.bb_regressor.parameters():
        p.requires_grad_(False)

    objective = distillation.CFKDLoss(
        reg_loss=nn.MSELoss(),
        w_ts=0.,
        w_ah=0.,
        w_cf=0.01,
        w_fd=100.,
        cf_layers=['conv1', 'layer1', 'layer2', 'layer3'])
    actor = actors.AtomCompressionActor(student_net, teacher_net, objective)

    # Optimizer
    optimizer = optim.Adam(actor.student_net.feature_extractor.parameters(),
                           lr=1e-2)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=15,
                                             gamma=0.1)

    # Create trainer
    trainer = LTRDistillationTrainer(actor, [loader_train, loader_val],
                                     optimizer, settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(50, load_latest=False, fail_safe=True)

    ########################################################
    ## Distil regressor next, turn off grad for backbone ###
    ########################################################
    for p in trainer.actor.student_net.bb_regressor.parameters():
        p.requires_grad_(True)
    for p in trainer.actor.student_net.feature_extractor.parameters():
        p.requires_grad_(False)

    objective = distillation.CFKDLoss(reg_loss=nn.MSELoss(),
                                      w_ts=1.,
                                      w_ah=0.1,
                                      w_cf=0.,
                                      w_fd=0.)
    trainer.actor.objective = objective

    # Optimizer
    trainer.optimizer = optim.Adam(
        trainer.actor.student_net.bb_regressor.parameters(), lr=1e-2)

    trainer.lr_scheduler = optim.lr_scheduler.StepLR(trainer.optimizer,
                                                     step_size=15,
                                                     gamma=0.1)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(100, load_latest=False, fail_safe=True)
예제 #11
0
def run(settings):
    # Most common settings are assigned in the settings struct
    debug = False
    if debug:
        settings.batch_size = 4  # 8  # 4  # 120  # 70 # 38
        settings.num_workers = 0  # 24  # 30  # 10  # 35  # 30 min(settings.batch_size, 16)
        settings.multi_gpu = False  # True  # True  # True #  True  # True  # True  # True

    else:
        settings.batch_size = 38  # 8  # 4  # 120  # 70 # 38
        settings.num_workers = 20  # 24  # 30  # 10  # 35  # 30 min(settings.batch_size, 16)
        settings.multi_gpu = True  # True  # True  # True #  True  # True  # True  # True

    settings.device = 'cuda'
    settings.description = 'TransT with default settings.'
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 6.  # 4.0
    settings.template_area_factor = 2.
    settings.search_feature_sz = 32
    settings.template_feature_sz = 16
    settings.search_sz = settings.search_feature_sz * 8
    settings.temp_sz = settings.template_feature_sz * 8
    settings.center_jitter_factor = {'search': 2.0, 'template': 0}  # 3
    settings.scale_jitter_factor = {'search': 0.05, 'template': 0}  # 0.25
    settings.sequence_length = 34  # 30  # 64 NEXT  # Same as PT
    settings.rand = False
    # settings.search_gap = 1  # Depreciated
    settings.init_ckpt = "pytracking/networks/transt.pth"

    # Transformer
    settings.position_embedding = 'sine'
    settings.hidden_dim = 256
    settings.dropout = 0.1
    settings.nheads = 8
    settings.dim_feedforward = 2048
    settings.featurefusion_layers = 4

    # settings.sigma = 1 / 4 / 5.
    # settings.kernel = 4
    # settings.feature = 32  # 18
    # settings.output_sz = 256  # settings.feature * 16
    # settings.end_pad_if_even = False
    # settings.label_function_params = True

    settings.move_data_to_gpu = True

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')  # votval
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(4)))
    # coco_train = MSCOCOSeq(settings.env.coco_dir)

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    # The augmentation transform applied to the training set (individually to each image in the pair)
    # transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2),
    #                                 tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))
    transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2),
                                    # tfm.RandomHorizontalFlip(),
                                    # tfm.RandomAffine(p_flip=0.5, max_scale=1.5),
                                    # tfm.RandomBlur(1),
                                    tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))

    # Data processing to do on the training pairs
    data_processing_train = processing.TransTProcessing(search_area_factor=settings.search_area_factor,
                                                      template_area_factor = settings.template_area_factor,
                                                      search_sz=settings.search_sz,
                                                      temp_sz=settings.temp_sz,
                                                      center_jitter_factor=settings.center_jitter_factor,
                                                      scale_jitter_factor=settings.scale_jitter_factor,
                                                      mode='sequence',
                                                      joint=False,  # Whether or not to apply same transform to every image
                                                      transform=transform_train,
                                                      rand=settings.rand,
                                                      label_function_params=None,  # settings.label_function_params,
                                                      joint_transform=transform_joint)

    # The sampler for training
    # dataset_train = sampler.TransTSampler([got10k_train], [1], samples_per_epoch=1000*settings.batch_size, max_gap=100, processing=data_processing_train, num_search_frames=settings.sequence_length, frame_sample_mode="rnn_causal")
    # dataset_train = sampler.TransTSampler([got10k_train, trackingnet_train], [1, 1], samples_per_epoch=1000*settings.batch_size, max_gap=100, processing=data_processing_train, num_search_frames=settings.sequence_length, frame_sample_mode="interval")

    dataset_train = sampler.TransTSampler([lasot_train, got10k_train, trackingnet_train], [1,1,1], samples_per_epoch=1000*settings.batch_size, max_gap=100, processing=data_processing_train)

    # The loader for training
    loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers,
                             shuffle=True, drop_last=True, stack_dim=0, pin_memory=settings.move_data_to_gpu == False)

    # Create network and actor
    model = transt_models.transt_resnet50(settings)

    # Wrap the network for multi GPU training
    if settings.multi_gpu:
        model = MultiGPU(model, dim=0)

    objective = transt_models.transt_loss(settings)
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('number of params:', n_parameters)

    actor = actors.TranstActor(net=model, objective=objective)

    # Optimizer
    # Change learning rate forthe Q we have changed and the RNN and the readout
    #         q = self.mix_q(torch.cat([q, self.mix_norm(exc)], -1))
    #        self.class_embed_new = MLP(hidden_dim * 2, hidden_dim, num_classes + 1, 3)
    #         self.bbox_embed_new = MLP(hidden_dim * 2, hidden_dim, 4, 3)
    param_dicts = [
        {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
        {
            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": 1e-5,
        },
    ]
    optimizer = torch.optim.AdamW(param_dicts, lr=1e-4,
                                  weight_decay=1e-4)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 500)

    # Create trainer
    trainer = LTRTrainer(actor, [loader_train], optimizer, settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(1000, load_latest=True, fail_safe=True)
예제 #12
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.device = 'cuda'
    settings.description = 'TransT with default settings.'
    settings.batch_size = 38
    settings.num_workers = 8
    settings.multi_gpu = True
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 4.0
    settings.template_area_factor = 2.0
    settings.search_feature_sz = 32
    settings.template_feature_sz = 16
    settings.search_sz = settings.search_feature_sz * 8
    settings.temp_sz = settings.template_feature_sz * 8
    settings.center_jitter_factor = {'search': 3, 'template': 0}
    settings.scale_jitter_factor = {'search': 0.25, 'template': 0}

    # Transformer
    settings.position_embedding = 'sine'
    settings.hidden_dim = 256
    settings.dropout = 0.1
    settings.nheads = 8
    settings.dim_feedforward = 2048
    settings.featurefusion_layers = 4

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(4)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2),
                                    tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std))

    # Data processing to do on the training pairs
    data_processing_train = processing.TransTProcessing(search_area_factor=settings.search_area_factor,
                                                      template_area_factor = settings.template_area_factor,
                                                      search_sz=settings.search_sz,
                                                      temp_sz=settings.temp_sz,
                                                      center_jitter_factor=settings.center_jitter_factor,
                                                      scale_jitter_factor=settings.scale_jitter_factor,
                                                      mode='sequence',
                                                      transform=transform_train,
                                                      joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.TransTSampler([lasot_train, got10k_train, coco_train, trackingnet_train], [1,1,1,1],
                                samples_per_epoch=1000*settings.batch_size, max_gap=100, processing=data_processing_train)

    # The loader for training
    loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers,
                             shuffle=True, drop_last=True, stack_dim=0)

    # Create network and actor
    model = transt_models.transt_resnet50(settings)

    # Wrap the network for multi GPU training
    if settings.multi_gpu:
        model = MultiGPU(model, dim=0)

    objective = transt_models.transt_loss(settings)
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('number of params:', n_parameters)

    actor = actors.TranstActor(net=model, objective=objective)

    # Optimizer
    param_dicts = [
        {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
        {
            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": 1e-5,
        },
    ]
    optimizer = torch.optim.AdamW(param_dicts, lr=1e-4,
                                  weight_decay=1e-4)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 500)

    # Create trainer
    trainer = LTRTrainer(actor, [loader_train], optimizer, settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(1000, load_latest=True, fail_safe=True)
예제 #13
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'SiamMask_base with ResNet-50 backbone.'
    settings.print_interval = 100  # How often to print loss and other info
    settings.batch_size = 64  # Batch size
    settings.samples_per_epoch = 600000  # Number of training pairs per epoch
    settings.num_workers = 4  # Number of workers for image loading
    settings.search_area_factor = {'train': 1.0, 'test': 2.0}
    settings.output_sz = {'train': 127, 'test': 255}
    settings.scale_type = 'context'
    settings.border_type = 'meanpad'

    # Settings for the image sample and label generation
    settings.center_jitter_factor = {'train': 0.125, 'test': 2.0}
    settings.scale_jitter_factor = {'train': 0.05, 'test': 0.18}
    settings.label_params = {
        'search_size': 255,
        'output_size': 25,
        'anchor_stride': 8,
        'anchor_ratios': [0.33, 0.5, 1, 2, 3],
        'anchor_scales': [8],
        'num_pos': 16,
        'num_neg': 16,
        'num_total': 64,
        'thr_high': 0.6,
        'thr_low': 0.3
    }
    settings.loss_weights = {'cls': 1., 'loc': 1.2, 'mask': 36}
    settings.neg = 0.2

    # Train datasets
    vos_train = YoutubeVOS()
    vid_train = ImagenetVID()
    coco_train = MSCOCOSeq()
    det_train = ImagenetDET()
    lasot_train = Lasot(split='train')
    got10k_train = Got10k(split='train')

    # Validation datasets
    vid_val = ImagenetVID()

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = dltransforms.ToGrayscale(probability=0.25)

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_exemplar = dltransforms.Transpose()
    transform_instance = dltransforms.Compose([
        dltransforms.Color(probability=1.0),
        dltransforms.Blur(probability=0.18),
        dltransforms.Transpose()
    ])
    transform_instance_mask = dltransforms.Transpose()

    # Data processing to do on the training pairs
    data_processing_train = processing.SiamProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        scale_type=settings.scale_type,
        border_type=settings.border_type,
        mode='sequence',
        label_params=settings.label_params,
        train_transform=transform_exemplar,
        test_transform=transform_instance,
        test_mask_transform=transform_instance_mask,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.SiamProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        scale_type=settings.scale_type,
        border_type=settings.border_type,
        mode='sequence',
        label_params=settings.label_params,
        transform=transform_exemplar,
        joint_transform=transform_joint)

    nums_per_epoch = settings.samples_per_epoch // settings.batch_size
    # The sampler for training
    dataset_train = sampler.MaskSampler([
        vid_train, coco_train, det_train, vos_train, lasot_train, got10k_train
    ], [2, 1, 1, 2, 1, 1],
                                        samples_per_epoch=nums_per_epoch *
                                        settings.batch_size,
                                        max_gap=100,
                                        processing=data_processing_train,
                                        neg=settings.neg)

    # The loader for training
    train_loader = loader.LTRLoader('train',
                                    dataset_train,
                                    training=True,
                                    batch_size=settings.batch_size,
                                    num_workers=settings.num_workers,
                                    stack_dim=0)

    # The sampler for validation
    dataset_val = sampler.MaskSampler([vid_val], [
        1,
    ],
                                      samples_per_epoch=100 *
                                      settings.batch_size,
                                      max_gap=100,
                                      processing=data_processing_val)

    # The loader for validation
    val_loader = loader.LTRLoader('val',
                                  dataset_val,
                                  training=False,
                                  batch_size=settings.batch_size,
                                  num_workers=settings.num_workers,
                                  stack_dim=0)

    # creat network, set objective, creat optimizer, learning rate scheduler, trainer
    with dygraph.guard():
        # Create network

        def scale_loss(loss):
            total_loss = 0
            for k in settings.loss_weights:
                total_loss += loss[k] * settings.loss_weights[k]
            return total_loss

        net = SiamMask_ResNet50_base(scale_loss=scale_loss)

        # Define objective
        objective = {
            'cls': select_softmax_with_cross_entropy_loss,
            'loc': weight_l1_loss,
            'mask': select_mask_logistic_loss
        }

        # Create actor, which wraps network and objective
        actor = actors.SiamActor(net=net, objective=objective)

        # Set to training mode
        actor.train()

        # Define optimizer and learning rate
        decayed_lr = fluid.layers.exponential_decay(learning_rate=0.005,
                                                    decay_steps=nums_per_epoch,
                                                    decay_rate=0.9642,
                                                    staircase=True)
        lr_scheduler = LinearLrWarmup(learning_rate=decayed_lr,
                                      warmup_steps=5 * nums_per_epoch,
                                      start_lr=0.001,
                                      end_lr=0.005)

        optimizer = fluid.optimizer.Adam(
            parameter_list=net.rpn_head.parameters() + net.neck.parameters() +
            net.mask_head.parameters(),
            learning_rate=lr_scheduler)

        trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer,
                             settings, lr_scheduler)
        trainer.train(20, load_latest=False, fail_safe=False)
예제 #14
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'ATOM using the probabilistic maximum likelihood trained regression model for bounding-box' \
                           'regression presented in [https://arxiv.org/abs/1909.12297].'
    settings.batch_size = 64
    settings.num_workers = 8
    settings.print_interval = 1
    settings.normalize_mean = [0.485, 0.456, 0.406]
    settings.normalize_std = [0.229, 0.224, 0.225]
    settings.search_area_factor = 5.0
    settings.feature_sz = 18
    settings.output_sz = settings.feature_sz * 16
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}

    # Train datasets
    lasot_train = Lasot(settings.env.lasot_dir, split='train')
    got10k_train = Got10k(settings.env.got10k_dir, split='vottrain')
    trackingnet_train = TrackingNet(settings.env.trackingnet_dir,
                                    set_ids=list(range(4)))
    coco_train = MSCOCOSeq(settings.env.coco_dir)

    # Validation datasets
    got10k_val = Got10k(settings.env.got10k_dir, split='votval')

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05))

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = tfm.Transform(
        tfm.ToTensorAndJitter(0.2),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    # The augmentation transform applied to the validation set (individually to each image in the pair)
    transform_val = tfm.Transform(
        tfm.ToTensor(),
        tfm.Normalize(mean=settings.normalize_mean,
                      std=settings.normalize_std))

    # Data processing to do on the training pairs
    proposal_params = {
        'boxes_per_frame': 128,
        'gt_sigma': (0, 0),
        'proposal_sigma': [(0.05, 0.05), (0.5, 0.5)],
        'add_mean_box': True
    }
    data_processing_train = processing.KLBBregProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=proposal_params,
        transform=transform_train,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.KLBBregProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=proposal_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler(
        [lasot_train, got10k_train, trackingnet_train, coco_train],
        [1, 1, 1, 1],
        samples_per_epoch=1000 * settings.batch_size,
        max_gap=200,
        processing=data_processing_train)

    # The loader for training
    loader_train = LTRLoader('train',
                             dataset_train,
                             training=True,
                             batch_size=settings.batch_size,
                             num_workers=settings.num_workers,
                             shuffle=True,
                             drop_last=True,
                             stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([got10k_val], [1],
                                      samples_per_epoch=500 *
                                      settings.batch_size,
                                      max_gap=200,
                                      processing=data_processing_val)

    # The loader for validation
    loader_val = LTRLoader('val',
                           dataset_val,
                           training=False,
                           batch_size=settings.batch_size,
                           num_workers=settings.num_workers,
                           shuffle=False,
                           drop_last=True,
                           epoch_interval=5,
                           stack_dim=1)

    # Create network and actor
    net = atom_models.atom_resnet18(backbone_pretrained=True)
    objective = klreg_losses.MLRegression()
    actor = bbreg_actors.AtomBBKLActor(net=net, objective=objective)

    # Optimizer
    optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             step_size=15,
                                             gamma=0.2)

    # Create trainer
    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer,
                         settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(50, load_latest=True, fail_safe=True)
예제 #15
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'Siam selection for detection with default settings.'
    settings.print_interval = 1                                 # How often to print loss and other info
    settings.batch_size = 1                                    # Batch size
    assert settings.batch_size==1,"only implement for batch_size 1"
    settings.num_workers = 4                                    # Number of workers for image loading
    settings.normalize_mean = [0.485, 0.456, 0.406]             # Normalize mean (default pytorch ImageNet values)
    settings.normalize_std = [0.229, 0.224, 0.225]              # Normalize std (default pytorch ImageNet values)
    settings.search_area_factor = 5.0                           # Image patch size relative to target size
    settings.feature_sz = 18                                    # Size of feature map
    settings.output_sz = settings.feature_sz * 16               # Size of input image patches

    # Settings for the image sample and proposal generation
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}
    settings.proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]}

    # Train datasets
    lasot_train = Lasot(split='train')
    trackingnet_train = TrackingNet(set_ids=list(range(11)))
    coco_train = MSCOCOSeq()

    # Validation datasets
    trackingnet_val = TrackingNet(set_ids=list(range(11,12)))

    # # The joint augmentation transform, that is applied to the pairs jointly
    # transform_joint = dltransforms.ToGrayscale(probability=0.05)
    #
    # # The augmentation transform applied to the training set (individually to each image in the pair)
    # transform_train = torchvision.transforms.Compose([dltransforms.ToTensorAndJitter(0.2),
    #                                                   torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)])
    #
    # # The augmentation transform applied to the validation set (individually to each image in the pair)
    # transform_val = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
    #                                                 torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)])

    # Data processing to do on the training pairs
    # data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor,
    #                                                   output_sz=settings.output_sz,
    #                                                   center_jitter_factor=settings.center_jitter_factor,
    #                                                   scale_jitter_factor=settings.scale_jitter_factor,
    #                                                   mode='sequence',
    #                                                   proposal_params=settings.proposal_params,
    #                                                   transform=transform_train,
    #                                                   joint_transform=transform_joint)
    #
    # # Data processing to do on the validation pairs
    # data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor,
    #                                                 output_sz=settings.output_sz,
    #                                                 center_jitter_factor=settings.center_jitter_factor,
    #                                                 scale_jitter_factor=settings.scale_jitter_factor,
    #                                                 mode='sequence',
    #                                                 proposal_params=settings.proposal_params,
    #                                                 transform=transform_val,
    #                                                 joint_transform=transform_joint)

    img_transform = ImageTransform(
        size_divisor=32, mean=[123.675, 116.28, 103.53],std=[58.395, 57.12, 57.375],to_rgb=True)
    data_processing=processing.SiamSelProcessing(transform=img_transform)

    # The sampler for training
    dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1],
                                        samples_per_epoch=1000*settings.batch_size, max_gap=50,
                                        processing=data_processing)

    # The loader for training
    loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers,
                             shuffle=True, drop_last=True, stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50,
                                      processing=data_processing)

    # The loader for validation
    loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers,
                           shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1)

    # Create network
    # net = atom_models.atom_resnet18(backbone_pretrained=True)
    net=SiamSelNet()

    # Set objective
    objective = nn.BCEWithLogitsLoss()

    # Create actor, which wraps network and objective
    actor = actors.SiamSelActor(net=net, objective=objective)

    # Optimizer
    optimizer = optim.Adam(actor.net.selector.parameters(), lr=1e-4)

    # Learning rate scheduler
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.2)
    # lr_scheduler = WarmupMultiStepLR(optimizer,[50*1000,80*1000])


    # Create trainer
    trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler)

    # Run training (set fail_safe=False if you are debugging)
    trainer.train(100, load_latest=True, fail_safe=True)


# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
#     def __init__(
#         self,
#         optimizer,
#         milestones,
#         gamma=0.1,
#         warmup_factor=1.0 / 3,
#         warmup_iters=500,
#         warmup_method="linear",
#         last_epoch=-1,
#     ):
#         if not list(milestones) == sorted(milestones):
#             raise ValueError(
#                 "Milestones should be a list of" " increasing integers. Got {}",
#                 milestones,
#             )
#
#         if warmup_method not in ("constant", "linear"):
#             raise ValueError(
#                 "Only 'constant' or 'linear' warmup_method accepted"
#                 "got {}".format(warmup_method)
#             )
#         self.milestones = milestones
#         self.gamma = gamma
#         self.warmup_factor = warmup_factor
#         self.warmup_iters = warmup_iters
#         self.warmup_method = warmup_method
#         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
#
#     def get_lr(self):
#         warmup_factor = 1
#         if self.last_epoch < self.warmup_iters:
#             if self.warmup_method == "constant":
#                 warmup_factor = self.warmup_factor
#             elif self.warmup_method == "linear":
#                 alpha = float(self.last_epoch) / self.warmup_iters
#                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
#         return [
#             base_lr
#             * warmup_factor
#             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
#             for base_lr in self.base_lrs
#         ]