Beispiel #1
0
 def _init_transform(self,
                     transform=transforms.ToArray(),
                     train_transform=None,
                     test_transform=None,
                     train_mask_transform=None,
                     test_mask_transform=None,
                     joint_transform=None):
     self.transform = {
         'train': transform if train_transform is None else train_transform,
         'test': transform if test_transform is None else test_transform,
         'joint': joint_transform
     }
     super().__init__(transform=transform,
                      train_transform=train_transform,
                      test_transform=test_transform,
                      joint_transform=joint_transform)
     self.transform['train_mask'] = self.transform['train'] if train_mask_transform is None \
         else train_mask_transform
     self.transform['test_mask'] = self.transform['test'] if test_mask_transform is None \
         else test_mask_transform
Beispiel #2
0
 def __init__(self,
              transform=transforms.ToArray(),
              train_transform=None,
              test_transform=None,
              joint_transform=None):
     """
     args:
         transform       - The set of transformations to be applied on the images. Used only if train_transform or
                             test_transform is None.
         train_transform - The set of transformations to be applied on the train images. If None, the 'transform'
                             argument is used instead.
         test_transform  - The set of transformations to be applied on the test images. If None, the 'transform'
                             argument is used instead.
         joint_transform - The set of transformations to be applied 'jointly' on the train and test images.  For
                             example, it can be used to convert both test and train images to grayscale.
     """
     self.transform = {
         'train': transform if train_transform is None else train_transform,
         'test': transform if test_transform is None else test_transform,
         'joint': joint_transform
     }
Beispiel #3
0
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'ATOM IoUNet with ResNet18 backbone and trained with vid, lasot, coco.'
    settings.print_interval = 1  # How often to print loss and other info
    settings.batch_size = 64  # Batch size
    settings.num_workers = 4  # Number of workers for image loading
    settings.normalize_mean = [0.485, 0.456, 0.406
                               ]  # Normalize mean (default ImageNet values)
    settings.normalize_std = [0.229, 0.224,
                              0.225]  # Normalize std (default ImageNet values)
    settings.search_area_factor = 5.0  # Image patch size relative to target size
    settings.feature_sz = 18  # Size of feature map
    settings.output_sz = settings.feature_sz * 16  # Size of input image patches

    # Settings for the image sample and proposal generation
    settings.center_jitter_factor = {'train': 0, 'test': 4.5}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.5}
    settings.proposal_params = {
        'min_iou': 0.1,
        'boxes_per_frame': 16,
        'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]
    }

    # Train datasets
    vid_train = ImagenetVID()
    lasot_train = Lasot(split='train')
    coco_train = MSCOCOSeq()

    # Validation datasets
    got10k_val = Got10k(split='val')

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = dltransforms.ToGrayscale(probability=0.05)

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_train = dltransforms.Compose([
        dltransforms.ToArrayAndJitter(0.2),
        dltransforms.Normalize(mean=settings.normalize_mean,
                               std=settings.normalize_std)
    ])

    # The augmentation transform applied to the validation set (individually to each image in the pair)
    transform_val = dltransforms.Compose([
        dltransforms.ToArray(),
        dltransforms.Normalize(mean=settings.normalize_mean,
                               std=settings.normalize_std)
    ])

    # Data processing to do on the training pairs
    data_processing_train = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=settings.proposal_params,
        transform=transform_train,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.ATOMProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        mode='sequence',
        proposal_params=settings.proposal_params,
        transform=transform_val,
        joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler(
        [vid_train, lasot_train, coco_train], [1, 1, 1],
        samples_per_epoch=1000 * settings.batch_size,
        max_gap=50,
        processing=data_processing_train)

    # The loader for training
    train_loader = loader.LTRLoader('train',
                                    dataset_train,
                                    training=True,
                                    batch_size=settings.batch_size,
                                    num_workers=4,
                                    stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([got10k_val], [
        1,
    ],
                                      samples_per_epoch=500 *
                                      settings.batch_size,
                                      max_gap=50,
                                      processing=data_processing_val)

    # The loader for validation
    val_loader = loader.LTRLoader('val',
                                  dataset_val,
                                  training=False,
                                  batch_size=settings.batch_size,
                                  epoch_interval=5,
                                  num_workers=4,
                                  stack_dim=1)

    # creat network, set objective, creat optimizer, learning rate scheduler, trainer
    with dygraph.guard():
        # Create network
        net = atom_resnet18(backbone_pretrained=True)

        # Freeze backbone
        state_dicts = net.state_dict()
        for k in state_dicts.keys():
            if 'feature_extractor' in k and "running" not in k:
                state_dicts[k].stop_gradient = True

        # Set objective
        objective = fluid.layers.square_error_cost

        # Create actor, which wraps network and objective
        actor = actors.AtomActor(net=net, objective=objective)

        # Set to training mode
        actor.train()

        # define optimizer and learning rate
        gama = 0.2
        lr = 1e-3
        lr_scheduler = fluid.dygraph.PiecewiseDecay(
            [15, 30, 45],
            values=[lr, lr * gama, lr * gama * gama],
            step=1000,
            begin=0)

        optimizer = fluid.optimizer.Adam(
            parameter_list=net.bb_regressor.parameters(),
            learning_rate=lr_scheduler)

        trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer,
                             settings, lr_scheduler)
        trainer.train(40, load_latest=False, fail_safe=False)
def run(settings):
    # Most common settings are assigned in the settings struct
    settings.description = 'SiamFC with Alexnet backbone and trained with vid'
    settings.print_interval = 1  # How often to print loss and other info
    settings.batch_size = 8  # Batch size
    settings.num_workers = 8  # Number of workers for image loading
    settings.normalize_mean = [0., 0., 0.]  # Normalize mean
    settings.normalize_std = [1 / 255., 1 / 255., 1 / 255.]  # Normalize std
    settings.search_area_factor = {
        'train': 1.0,
        'test': 2.0078740157480315
    }  # roughly the same as SiamFC
    settings.output_sz = {'train': 127, 'test': 255}
    settings.scale_type = 'context'
    settings.border_type = 'replicate'

    # Settings for the image sample and proposal generation
    settings.center_jitter_factor = {'train': 0, 'test': 0}
    settings.scale_jitter_factor = {'train': 0, 'test': 0.}

    # Train datasets
    vid_train = ImagenetVID()

    # Validation datasets
    got10k_val = Got10k(split='val')

    # The joint augmentation transform, that is applied to the pairs jointly
    transform_joint = dltransforms.ToGrayscale(probability=0.25)

    # The augmentation transform applied to the training set (individually to each image in the pair)
    transform_exemplar = dltransforms.Compose([
        dltransforms.ToArray(),
        dltransforms.Normalize(mean=settings.normalize_mean,
                               std=settings.normalize_std)
    ])
    transform_instance = dltransforms.Compose([
        DataAug(),
        dltransforms.ToArray(),
        dltransforms.Normalize(mean=settings.normalize_mean,
                               std=settings.normalize_std)
    ])

    # Data processing to do on the training pairs
    data_processing_train = processing.SiamFCProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        scale_type=settings.scale_type,
        border_type=settings.border_type,
        mode='sequence',
        train_transform=transform_exemplar,
        test_transform=transform_instance,
        joint_transform=transform_joint)

    # Data processing to do on the validation pairs
    data_processing_val = processing.SiamFCProcessing(
        search_area_factor=settings.search_area_factor,
        output_sz=settings.output_sz,
        center_jitter_factor=settings.center_jitter_factor,
        scale_jitter_factor=settings.scale_jitter_factor,
        scale_type=settings.scale_type,
        border_type=settings.border_type,
        mode='sequence',
        transform=transform_exemplar,
        joint_transform=transform_joint)

    # The sampler for training
    dataset_train = sampler.ATOMSampler([vid_train], [
        1,
    ],
                                        samples_per_epoch=6650 *
                                        settings.batch_size,
                                        max_gap=100,
                                        processing=data_processing_train)

    # The loader for training
    train_loader = loader.LTRLoader('train',
                                    dataset_train,
                                    training=True,
                                    batch_size=settings.batch_size,
                                    num_workers=settings.num_workers,
                                    stack_dim=1)

    # The sampler for validation
    dataset_val = sampler.ATOMSampler([got10k_val], [
        1,
    ],
                                      samples_per_epoch=1000 *
                                      settings.batch_size,
                                      max_gap=100,
                                      processing=data_processing_val)

    # The loader for validation
    val_loader = loader.LTRLoader('val',
                                  dataset_val,
                                  training=False,
                                  batch_size=settings.batch_size,
                                  num_workers=settings.num_workers,
                                  epoch_interval=5,
                                  stack_dim=1)

    # creat network, set objective, creat optimizer, learning rate scheduler, trainer
    with dygraph.guard():
        # Create network
        net = siamfc_alexnet()

        # Create actor, which wraps network and objective
        actor = actors.SiamFCActor(net=net,
                                   objective=None,
                                   batch_size=settings.batch_size,
                                   shape=(17, 17),
                                   radius=16,
                                   stride=8)

        # Set to training mode
        actor.train()

        # define optimizer and learning rate
        lr_scheduler = fluid.layers.exponential_decay(learning_rate=0.01,
                                                      decay_steps=6650,
                                                      decay_rate=0.8685,
                                                      staircase=True)
        regularizer = fluid.regularizer.L2DecayRegularizer(
            regularization_coeff=0.0005)
        optimizer = fluid.optimizer.Momentum(momentum=0.9,
                                             regularization=regularizer,
                                             parameter_list=net.parameters(),
                                             learning_rate=lr_scheduler)

        trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer,
                             settings, lr_scheduler)
        trainer.train(50, load_latest=False, fail_safe=False)