def initialize(self): with fluid.dygraph.guard(): if os.path.isabs(self.net_path): net_path_full = self.net_path else: net_path_full = os.path.join(env_settings().network_path, self.net_path) self.net = siamfc_alexnet( backbone_pretrained=False, backbone_is_test=True, estimator_is_test=True) state_dictsm, _ = fluid.load_dygraph(net_path_full) self.net.load_dict(state_dictsm) self.net.train() self.target_estimator = self.net.target_estimator self.layer_stride = {'conv5': 8} self.layer_dim = {'conv5': 256} self.estimator_feature_layers = self.net.target_estimator_layer if isinstance(self.pool_stride, int) and self.pool_stride == 1: self.pool_stride = [1] * len(self.output_layers) self.feature_layers = sorted( list(set(self.output_layers + self.estimator_feature_layers))) self.mean = np.reshape([0., 0., 0.], [1, -1, 1, 1]) self.std = np.reshape([1 / 255., 1 / 255., 1 / 255.], [1, -1, 1, 1])
def model_initializer(self): import os net_path = self.params.net_path if net_path is None: net_path = self.params.features.features[0].net_path if not os.path.exists(net_path): raise Exception("not found {}".format(net_path)) with dygraph.guard(): self.model = siamfc_alexnet(backbone_is_test=True) #state_dict, _ = fluid.load_dygraph(net_path) weight_params, opt_params = fluid.load_dygraph(net_path) state_dict = self.model.state_dict() for k1, k2 in zip(state_dict.keys(), weight_params.keys()): if list(state_dict[k1].shape) == list(weight_params[k2].shape): state_dict[k1].set_value(weight_params[k2]) else: raise Exception("ERROR, shape not match") self.model.load_dict(state_dict) self.model.eval()
def run(settings): # Most common settings are assigned in the settings struct settings.description = 'SiamFC with Alexnet backbone and trained with vid' settings.print_interval = 1 # How often to print loss and other info settings.batch_size = 8 # Batch size settings.num_workers = 8 # Number of workers for image loading settings.normalize_mean = [0., 0., 0.] # Normalize mean settings.normalize_std = [1 / 255., 1 / 255., 1 / 255.] # Normalize std settings.search_area_factor = { 'train': 1.0, 'test': 2.0078740157480315 } # roughly the same as SiamFC settings.output_sz = {'train': 127, 'test': 255} settings.scale_type = 'context' settings.border_type = 'replicate' # Settings for the image sample and proposal generation settings.center_jitter_factor = {'train': 0, 'test': 0} settings.scale_jitter_factor = {'train': 0, 'test': 0.} # Train datasets vid_train = ImagenetVID() # Validation datasets got10k_val = Got10k(split='val') # The joint augmentation transform, that is applied to the pairs jointly transform_joint = dltransforms.ToGrayscale(probability=0.25) # The augmentation transform applied to the training set (individually to each image in the pair) transform_exemplar = dltransforms.Compose([ dltransforms.ToArray(), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) transform_instance = dltransforms.Compose([ DataAug(), dltransforms.ToArray(), dltransforms.Normalize(mean=settings.normalize_mean, std=settings.normalize_std) ]) # Data processing to do on the training pairs data_processing_train = processing.SiamFCProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, scale_type=settings.scale_type, border_type=settings.border_type, mode='sequence', train_transform=transform_exemplar, test_transform=transform_instance, joint_transform=transform_joint) # Data processing to do on the validation pairs data_processing_val = processing.SiamFCProcessing( search_area_factor=settings.search_area_factor, output_sz=settings.output_sz, center_jitter_factor=settings.center_jitter_factor, scale_jitter_factor=settings.scale_jitter_factor, scale_type=settings.scale_type, border_type=settings.border_type, mode='sequence', transform=transform_exemplar, joint_transform=transform_joint) # The sampler for training dataset_train = sampler.ATOMSampler([vid_train], [ 1, ], samples_per_epoch=6650 * settings.batch_size, max_gap=100, processing=data_processing_train) # The loader for training train_loader = loader.LTRLoader('train', dataset_train, training=True, batch_size=settings.batch_size, num_workers=settings.num_workers, stack_dim=1) # The sampler for validation dataset_val = sampler.ATOMSampler([got10k_val], [ 1, ], samples_per_epoch=1000 * settings.batch_size, max_gap=100, processing=data_processing_val) # The loader for validation val_loader = loader.LTRLoader('val', dataset_val, training=False, batch_size=settings.batch_size, num_workers=settings.num_workers, epoch_interval=5, stack_dim=1) # creat network, set objective, creat optimizer, learning rate scheduler, trainer with dygraph.guard(): # Create network net = siamfc_alexnet() # Create actor, which wraps network and objective actor = actors.SiamFCActor(net=net, objective=None, batch_size=settings.batch_size, shape=(17, 17), radius=16, stride=8) # Set to training mode actor.train() # define optimizer and learning rate lr_scheduler = fluid.layers.exponential_decay(learning_rate=0.01, decay_steps=6650, decay_rate=0.8685, staircase=True) regularizer = fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005) optimizer = fluid.optimizer.Momentum(momentum=0.9, regularization=regularizer, parameter_list=net.parameters(), learning_rate=lr_scheduler) trainer = LTRTrainer(actor, [train_loader, val_loader], optimizer, settings, lr_scheduler) trainer.train(50, load_latest=False, fail_safe=False)