def run(settings): # Most common settings are assigned in the settings struct settings.description = 'Siam selection for detection with default settings.' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 1 # Batch size assert settings.batch_size==1,"only implement for batch_size 1" settings.num_workers = 4 # Number of workers for image loading settings.normalize_mean = [0.485, 0.456, 0.406] # Normalize mean (default pytorch ImageNet values) settings.normalize_std = [0.229, 0.224, 0.225] # Normalize std (default pytorch ImageNet values) settings.search_area_factor = 5.0 # Image patch size relative to target size settings.feature_sz = 18 # Size of feature map settings.output_sz = settings.feature_sz * 16 # Size of input image patches # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} settings.proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]} # Train datasets lasot_train = Lasot(split='train') trackingnet_train = TrackingNet(set_ids=list(range(11))) # coco_train = MSCOCOSeq() # Validation datasets trackingnet_val = TrackingNet(set_ids=list(range(11,12))) # # The joint augmentation transform, that is applied to the pairs jointly # transform_joint = dltransforms.ToGrayscale(probability=0.05) # # # The augmentation transform applied to the training set (individually to each image in the pair) # transform_train = torchvision.transforms.Compose([dltransforms.ToTensorAndJitter(0.2), # torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)]) # # # The augmentation transform applied to the validation set (individually to each image in the pair) # transform_val = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), # torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)]) # Data processing to do on the training pairs # data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, # output_sz=settings.output_sz, # center_jitter_factor=settings.center_jitter_factor, # scale_jitter_factor=settings.scale_jitter_factor, # mode='sequence', # proposal_params=settings.proposal_params, # transform=transform_train, # joint_transform=transform_joint) # # # Data processing to do on the validation pairs # data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, # output_sz=settings.output_sz, # center_jitter_factor=settings.center_jitter_factor, # scale_jitter_factor=settings.scale_jitter_factor, # mode='sequence', # proposal_params=settings.proposal_params, # transform=transform_val, # joint_transform=transform_joint) img_transform = ImageTransform( size_divisor=32, mean=[123.675, 116.28, 103.53],std=[58.395, 57.12, 57.375],to_rgb=True) data_processing=processing.SiamSelProcessing(transform=img_transform) # The sampler for training # dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1], # samples_per_epoch=1000*settings.batch_size, max_gap=50*20, # processing=data_processing) dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train], [1,3], samples_per_epoch=1000*settings.batch_size, max_gap=50*20, processing=data_processing) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50*20, processing=data_processing) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network # net = atom_models.atom_resnet18(backbone_pretrained=True) net=SiamSelNet() # Set objective objective = nn.BCEWithLogitsLoss() # Create actor, which wraps network and objective actor = actors.SiamSelActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.selector.parameters(), lr=1e-4,weight_decay=0.0001) # Learning rate scheduler lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.2) # lr_scheduler = WarmupMultiStepLR(optimizer,[50*1000,80*1000]) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(150, load_latest=True, fail_safe=True) #larget frame gap #without coco #lasot : trackingnet 1:3
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with default settings according to the paper.' settings.batch_size = 64 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets lasot_train = Lasot(settings.env.lasot_dir, split='train') trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11))) coco_train = MSCOCOSeq(settings.env.coco_dir) # Validation datasets trackingnet_val = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11,12))) # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform(tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform(tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]} data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1], samples_per_epoch=1000*settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network and actor net = atom_models.atom_resnet18(backbone_pretrained=True) objective = nn.MSELoss() actor = actors.AtomActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(50, load_latest=True, fail_safe=True)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'SiamFC with Alexnet backbone and trained with vid' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 8 # Batch size settings.num_workers = 8 # Number of workers for image loading settings.normalize_mean = [0., 0., 0.] # Normalize mean settings.normalize_std = [1 / 255., 1 / 255., 1 / 255.] # Normalize std settings.search_area_factor = { 'train': 1.0, 'test': 2.0078740157480315 } # roughly the same as SiamFC settings.output_sz = {'train': 127, 'test': 255} settings.scale_type = 'context' settings.border_type = 'replicate' # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 0} settings.scale_jitter_factor = {'train': 0, 'test': 0.} # Train datasets vid_train = ImagenetVID() # Validation datasets got10k_val = Got10k(split='val') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = dltransforms.ToGrayscale(probability=0.25) # The augmentation transform applied to the training set (individually to each image in the pair) transform_exemplar = dltransforms.Compose([ dltransforms.ToArray(), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) transform_instance = dltransforms.Compose([ DataAug(), dltransforms.ToArray(), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # Data processing to do on the training pairs data_processing_train = processing.SiamFCProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, scale_type=settings.scale_type, border_type=settings.border_type, mode='sequence', train_transform=transform_exemplar, test_transform=transform_instance, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.SiamFCProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, scale_type=settings.scale_type, border_type=settings.border_type, mode='sequence', transform=transform_exemplar, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler([vid_train], [ 1, ], samples_per_epoch=6650 * settings.batch_size, max_gap=100, processing=data_processing_train) # The loader for training train_loader = loader.LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([got10k_val], [ 1, ], samples_per_epoch=1000 * settings.batch_size, max_gap=100, processing=data_processing_val) # The loader for validation val_loader = loader.LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, epoch_interval=5, stack_dim=1) # creat network, set objective, creat optimizer, learning rate scheduler, trainer with dygraph.guard(): # Create network net = siamfc_alexnet() # Create actor, which wraps network and objective actor = actors.SiamFCActor(net=net, objective=None, batch_size=settings.batch_size, shape=(17, 17), radius=16, stride=8) # Set to training mode actor.train() # define optimizer and learning rate lr_scheduler = fluid.layers.exponential_decay(learning_rate=0.01, decay_steps=6650, decay_rate=0.8685, staircase=True) regularizer = fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005) optimizer = fluid.optimizer.Momentum(momentum=0.9, regularization=regularizer, parameter_list=net.parameters(), learning_rate=lr_scheduler) trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer, settings, lr_scheduler) trainer.train(50, load_latest=False, fail_safe=False)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with ResNet18 backbone and trained with vid, lasot, coco.' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 64 # Batch size settings.num_workers = 4 # Number of workers for image loading settings.normalize_mean = [0.485, 0.456, 0.406 ] # Normalize mean (default ImageNet values) settings.normalize_std = [0.229, 0.224, 0.225] # Normalize std (default ImageNet values) settings.search_area_factor = 5.0 # Image patch size relative to target size settings.feature_sz = 18 # Size of feature map settings.output_sz = settings.feature_sz * 16 # Size of input image patches # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} settings.proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } # Train datasets vid_train = ImagenetVID() lasot_train = Lasot(split='train') coco_train = MSCOCOSeq() # Validation datasets got10k_val = Got10k(split='val') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = dltransforms.ToGrayscale(probability=0.05) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = dltransforms.Compose([ dltransforms.ToArrayAndJitter(0.2), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = dltransforms.Compose([ dltransforms.ToArray(), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # Data processing to do on the training pairs data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [vid_train, lasot_train, coco_train], [1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training train_loader = loader.LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=4, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([got10k_val], [ 1, ], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation val_loader = loader.LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, epoch_interval=5, num_workers=4, stack_dim=1) # creat network, set objective, creat optimizer, learning rate scheduler, trainer with dygraph.guard(): # Create network net = atom_resnet18(backbone_pretrained=True) # Freeze backbone state_dicts = net.state_dict() for k in state_dicts.keys(): if 'feature_extractor' in k and "running" not in k: state_dicts[k].stop_gradient = True # Set objective objective = fluid.layers.square_error_cost # Create actor, which wraps network and objective actor = actors.AtomActor(net=net, objective=objective) # Set to training mode actor.train() # define optimizer and learning rate gama = 0.2 lr = 1e-3 lr_scheduler = fluid.dygraph.PiecewiseDecay( [15, 30, 45], values=[lr, lr * gama, lr * gama * gama], step=1000, begin=0) optimizer = fluid.optimizer.Adam( parameter_list=net.bb_regressor.parameters(), learning_rate=lr_scheduler) trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer, settings, lr_scheduler) trainer.train(40, load_latest=False, fail_safe=False)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with default settings.' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 64 # Batch size settings.num_workers = 4 # Number of workers for image loading settings.normalize_mean = [ 0.485, 0.456, 0.406 ] # Normalize mean (default pytorch ImageNet values) settings.normalize_std = [ 0.229, 0.224, 0.225 ] # Normalize std (default pytorch ImageNet values) settings.search_area_factor = 5.0 # Image patch size relative to target size settings.feature_sz = 18 # Size of feature map settings.output_sz = settings.feature_sz * 16 # Size of input image patches # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} settings.proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } # Train datasets lasot_train = Lasot(split='train') trackingnet_train = TrackingNet(set_ids=list(range(11))) coco_train = MSCOCOSeq() # Validation datasets trackingnet_val = TrackingNet(set_ids=list(range(11, 12))) # The joint augmentation transform, that is applied to the pairs jointly transform_joint = dltransforms.ToGrayscale(probability=0.05) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = torchvision.transforms.Compose([ dltransforms.ToTensorAndJitter(0.2), torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # Data processing to do on the training pairs data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=settings.proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [lasot_train, trackingnet_train, coco_train], [1, 1, 1], samples_per_epoch=1800 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network net = atom_models.atom_resnet50(backbone_pretrained=True) # Set objective objective = nn.MSELoss() # Create actor, which wraps network and objective actor = actors.AtomActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3) # Learning rate scheduler lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(40, load_latest=True, fail_safe=False)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'distilled ATOM IoUNet with default settings according to the paper.' settings.batch_size = 32 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets lasot_train = Lasot(settings.env.lasot_dir, split='train') trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11))) coco_train = MSCOCOSeq(settings.env.coco_dir) # Validation datasets trackingnet_val = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(11, 12))) # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform( tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform( tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [lasot_train, trackingnet_train, coco_train], [1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Load teacher network teacher_net = atom_models.atom_resnet18(backbone_pretrained=True) teacher_path = '/home/ddanier/CFKD/pytracking/networks/atom_default.pth' teacher_net = loading.load_weights(teacher_net, teacher_path, strict=True) print( '*******************Teacher net loaded successfully*******************' ) # Create student network and actor student_net = atom_models.atom_mobilenetsmall(backbone_pretrained=False) ########################################################## ### Distil backbone first, turn off grad for regressor ### ########################################################## for p in student_net.bb_regressor.parameters(): p.requires_grad_(False) objective = distillation.CFKDLoss( reg_loss=nn.MSELoss(), w_ts=0., w_ah=0., w_cf=0.01, w_fd=100., cf_layers=['conv1', 'layer1', 'layer2', 'layer3']) actor = actors.AtomCompressionActor(student_net, teacher_net, objective) # Optimizer optimizer = optim.Adam(actor.student_net.feature_extractor.parameters(), lr=1e-2) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1) # Create trainer trainer = LTRDistillationTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(50, load_latest=False, fail_safe=True) ######################################################## ## Distil regressor next, turn off grad for backbone ### ######################################################## for p in trainer.actor.student_net.bb_regressor.parameters(): p.requires_grad_(True) for p in trainer.actor.student_net.feature_extractor.parameters(): p.requires_grad_(False) objective = distillation.CFKDLoss(reg_loss=nn.MSELoss(), w_ts=1., w_ah=0.1, w_cf=0., w_fd=0.) trainer.actor.objective = objective # Optimizer trainer.optimizer = optim.Adam( trainer.actor.student_net.bb_regressor.parameters(), lr=1e-2) trainer.lr_scheduler = optim.lr_scheduler.StepLR(trainer.optimizer, step_size=15, gamma=0.1) # Run training (set fail_safe=False if you are debugging) trainer.train(100, load_latest=False, fail_safe=True)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM IoUNet with default settings, but additionally using GOT10k for training.' settings.batch_size = 64 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets # lasot_train = Lasot(settings.env.lasot_dir, split='train') # got10k_train = Got10k(settings.env.got10k_dir, split='vottrain') # trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(4))) # coco_train = MSCOCOSeq(settings.env.coco_dir) coco_train = MSCOCOSeq_depth(settings.env.cocodepth_dir, dtype='rgbcolormap') lasot_depth_train = Lasot_depth(root=settings.env.lasotdepth_dir, dtype='rgbcolormap') depthtrack_train = DepthTrack(root=settings.env.depthtrack_dir, dtype='rgbcolormap') depthtrack_horizontal_train = DepthTrack( root=settings.env.depthtrack_horizontal_dir, dtype='rgbcolormap') depthtrack_vertical_train = DepthTrack( root=settings.env.depthtrack_vertical_dir, dtype='rgbcolormap') # Validation datasets # got10k_val = Got10k(settings.env.got10k_dir, split='votval') cdtb_val = CDTB(settings.env.cdtb_dir, split='val', dtype='rgbcolormap') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform( tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform( tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = { 'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3] } data_processing_train = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.ATOMProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler([ lasot_depth_train, depthtrack_train, depthtrack_horizontal_train, depthtrack_vertical_train, coco_train ], [1, 1, 1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=50, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([cdtb_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=50, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network and actor net = atom_models.atom_resnet18_DeT( backbone_pretrained=True, merge_type='max') # 'mean', 'conv', 'weightedSum' objective = nn.MSELoss() actor = actors.AtomActor(net=net, objective=objective) # Optimizer # optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3) optimizer = optim.Adam( [ { 'params': actor.net.bb_regressor.parameters() }, { 'params': actor.net.feature_extractor.parameters(), 'lr': 2e-5 }, { 'params': actor.net.feature_extractor_depth.parameters(), 'lr': 2e-5 }, # {'params': actor.net.merge_layer2.parameters(), 'lr': 2e-5}, # {'params': actor.net.merge_layer3.parameters(), 'lr': 2e-5}, ], lr=1e-3) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(80, load_latest=True, fail_safe=True)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'ATOM using the probabilistic maximum likelihood trained regression model for bounding-box' \ 'regression presented in [https://arxiv.org/abs/1909.12297].' settings.batch_size = 64 settings.num_workers = 8 settings.print_interval = 1 settings.normalize_mean = [0.485, 0.456, 0.406] settings.normalize_std = [0.229, 0.224, 0.225] settings.search_area_factor = 5.0 settings.feature_sz = 18 settings.output_sz = settings.feature_sz * 16 settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} # Train datasets lasot_train = Lasot(settings.env.lasot_dir, split='train') got10k_train = Got10k(settings.env.got10k_dir, split='vottrain') trackingnet_train = TrackingNet(settings.env.trackingnet_dir, set_ids=list(range(4))) coco_train = MSCOCOSeq(settings.env.coco_dir) # Validation datasets got10k_val = Got10k(settings.env.got10k_dir, split='votval') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = tfm.Transform(tfm.ToGrayscale(probability=0.05)) # The augmentation transform applied to the training set (individually to each image in the pair) transform_train = tfm.Transform( tfm.ToTensorAndJitter(0.2), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # The augmentation transform applied to the validation set (individually to each image in the pair) transform_val = tfm.Transform( tfm.ToTensor(), tfm.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)) # Data processing to do on the training pairs proposal_params = { 'boxes_per_frame': 128, 'gt_sigma': (0, 0), 'proposal_sigma': [(0.05, 0.05), (0.5, 0.5)], 'add_mean_box': True } data_processing_train = processing.KLBBregProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_train, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.KLBBregProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, mode='sequence', proposal_params=proposal_params, transform=transform_val, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler( [lasot_train, got10k_train, trackingnet_train, coco_train], [1, 1, 1, 1], samples_per_epoch=1000 * settings.batch_size, max_gap=200, processing=data_processing_train) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([got10k_val], [1], samples_per_epoch=500 * settings.batch_size, max_gap=200, processing=data_processing_val) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network and actor net = atom_models.atom_resnet18(backbone_pretrained=True) objective = klreg_losses.MLRegression() actor = bbreg_actors.AtomBBKLActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.bb_regressor.parameters(), lr=1e-3) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.2) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(50, load_latest=True, fail_safe=True)
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'Siam selection for detection with default settings.' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 1 # Batch size assert settings.batch_size==1,"only implement for batch_size 1" settings.num_workers = 4 # Number of workers for image loading settings.normalize_mean = [0.485, 0.456, 0.406] # Normalize mean (default pytorch ImageNet values) settings.normalize_std = [0.229, 0.224, 0.225] # Normalize std (default pytorch ImageNet values) settings.search_area_factor = 5.0 # Image patch size relative to target size settings.feature_sz = 18 # Size of feature map settings.output_sz = settings.feature_sz * 16 # Size of input image patches # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 4.5} settings.scale_jitter_factor = {'train': 0, 'test': 0.5} settings.proposal_params = {'min_iou': 0.1, 'boxes_per_frame': 16, 'sigma_factor': [0.01, 0.05, 0.1, 0.2, 0.3]} # Train datasets lasot_train = Lasot(split='train') trackingnet_train = TrackingNet(set_ids=list(range(11))) coco_train = MSCOCOSeq() # Validation datasets trackingnet_val = TrackingNet(set_ids=list(range(11,12))) # # The joint augmentation transform, that is applied to the pairs jointly # transform_joint = dltransforms.ToGrayscale(probability=0.05) # # # The augmentation transform applied to the training set (individually to each image in the pair) # transform_train = torchvision.transforms.Compose([dltransforms.ToTensorAndJitter(0.2), # torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)]) # # # The augmentation transform applied to the validation set (individually to each image in the pair) # transform_val = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), # torchvision.transforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std)]) # Data processing to do on the training pairs # data_processing_train = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, # output_sz=settings.output_sz, # center_jitter_factor=settings.center_jitter_factor, # scale_jitter_factor=settings.scale_jitter_factor, # mode='sequence', # proposal_params=settings.proposal_params, # transform=transform_train, # joint_transform=transform_joint) # # # Data processing to do on the validation pairs # data_processing_val = processing.ATOMProcessing(search_area_factor=settings.search_area_factor, # output_sz=settings.output_sz, # center_jitter_factor=settings.center_jitter_factor, # scale_jitter_factor=settings.scale_jitter_factor, # mode='sequence', # proposal_params=settings.proposal_params, # transform=transform_val, # joint_transform=transform_joint) img_transform = ImageTransform( size_divisor=32, mean=[123.675, 116.28, 103.53],std=[58.395, 57.12, 57.375],to_rgb=True) data_processing=processing.SiamSelProcessing(transform=img_transform) # The sampler for training dataset_train = sampler.ATOMSampler([lasot_train, trackingnet_train, coco_train], [1,1,1], samples_per_epoch=1000*settings.batch_size, max_gap=50, processing=data_processing) # The loader for training loader_train = LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=True, drop_last=True, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([trackingnet_val], [1], samples_per_epoch=500*settings.batch_size, max_gap=50, processing=data_processing) # The loader for validation loader_val = LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, shuffle=False, drop_last=True, epoch_interval=5, stack_dim=1) # Create network # net = atom_models.atom_resnet18(backbone_pretrained=True) net=SiamSelNet() # Set objective objective = nn.BCEWithLogitsLoss() # Create actor, which wraps network and objective actor = actors.SiamSelActor(net=net, objective=objective) # Optimizer optimizer = optim.Adam(actor.net.selector.parameters(), lr=1e-4) # Learning rate scheduler lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.2) # lr_scheduler = WarmupMultiStepLR(optimizer,[50*1000,80*1000]) # Create trainer trainer = LTRTrainer(actor, [loader_train, loader_val], optimizer, settings, lr_scheduler) # Run training (set fail_safe=False if you are debugging) trainer.train(100, load_latest=True, fail_safe=True) # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): # def __init__( # self, # optimizer, # milestones, # gamma=0.1, # warmup_factor=1.0 / 3, # warmup_iters=500, # warmup_method="linear", # last_epoch=-1, # ): # if not list(milestones) == sorted(milestones): # raise ValueError( # "Milestones should be a list of" " increasing integers. Got {}", # milestones, # ) # # if warmup_method not in ("constant", "linear"): # raise ValueError( # "Only 'constant' or 'linear' warmup_method accepted" # "got {}".format(warmup_method) # ) # self.milestones = milestones # self.gamma = gamma # self.warmup_factor = warmup_factor # self.warmup_iters = warmup_iters # self.warmup_method = warmup_method # super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) # # def get_lr(self): # warmup_factor = 1 # if self.last_epoch < self.warmup_iters: # if self.warmup_method == "constant": # warmup_factor = self.warmup_factor # elif self.warmup_method == "linear": # alpha = float(self.last_epoch) / self.warmup_iters # warmup_factor = self.warmup_factor * (1 - alpha) + alpha # return [ # base_lr # * warmup_factor # * self.gamma ** bisect_right(self.milestones, self.last_epoch) # for base_lr in self.base_lrs # ]