def get_results(net_type, net_base_path, model_name, vis=False, p=Config()): # get the default parameters # p = Config() # load all videos all_videos = os.listdir(p.seq_base_path) p.bbox_output = True p.bbox_output_path = os.path.join("./tracking_result/", str(net_type)) p.visualization = vis if p.bbox_output: if not os.path.exists(p.bbox_output_path): os.makedirs(p.bbox_output_path) fps_all = .0 for it, video in enumerate(all_videos): p.video = video print("Processing %s ... " % p.video) print("{} / {}".format(it, len(all_videos))) bbox_result, fps = run_tracker(p, net_type, net_base_path, model_name) # fps for this video print("FPS: %d " % fps) # saving tracking results if p.bbox_output: np.savetxt(os.path.join(p.bbox_output_path, p.video.lower() + '_SiamFC.txt'), bbox_result, fmt='%.3f') fps_all = fps_all + fps avg_fps = fps_all / len(all_videos) print("Average FPS: %f" % avg_fps)
def get_specific_video_results(net_type, net_base_path, model_name, video_name, vis=False, save_to_file=False): # get the default parameters p = Config() p.visualization = vis p.save_to_file = save_to_file p.save_to_video = False p.video = video_name print("Processing %s ... " % p.video) bbox_result, fps = run_tracker(p, net_type, net_base_path, model_name) # fps for this video print("FPS: %d " % fps)
def train(data_dir, train_imdb, val_imdb, model_save_path="./model/", use_gpu=True, type=None): # initialize training configuration config = Config() # do data augmentation in PyTorch; # you can also do complex data augmentation as in the original paper center_crop_size = config.instance_size - config.stride random_crop_size = config.instance_size - 2 * config.stride train_z_transforms = transforms.Compose([ RandomStretch(), CenterCrop((config.examplar_size, config.examplar_size)), ToTensor() ]) train_x_transforms = transforms.Compose([ RandomStretch(), CenterCrop((center_crop_size, center_crop_size)), RandomCrop((random_crop_size, random_crop_size)), ToTensor() ]) valid_z_transforms = transforms.Compose([ CenterCrop((config.examplar_size, config.examplar_size)), ToTensor(), ]) valid_x_transforms = transforms.Compose([ToTensor()]) # load data (see details in VIDDataset.py) train_dataset = VIDDataset(train_imdb, data_dir, config, train_z_transforms, train_x_transforms) val_dataset = VIDDataset(val_imdb, data_dir, config, valid_z_transforms, valid_x_transforms, "Validation") # create dataloader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.train_num_workers, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.val_num_workers, drop_last=True) # create SiamFC network architecture (see details in SiamNet.py) net = SiamNet(network_type=type) # define training strategy; # the learning rate of adjust layer (i.e., a conv layer) # is set to 0 as in the original paper optimizer = torch.optim.SGD([ { 'params': net.conv_features.parameters() }, { 'params': net.adjust.bias }, { 'params': net.adjust.weight, 'lr': 0.0 }, ], config.lr, config.momentum, config.weight_decay) # move network to GPU if using GPU if use_gpu: net.cuda() # adjusting learning in each epoch scheduler = StepLR(optimizer, config.step_size, config.gamma) # used to control generating label for training; # once generated, they are fixed since the labels for each # pair of images (examplar z and search region x) are the same train_response_flag = False valid_response_flag = False # ------------------------ training & validation process ------------------------ for i in range(config.num_epoch): # adjusting learning rate scheduler.step() # ------------------------------ training ------------------------------ # indicating training (very important for batch normalization) net.train() # used to collect loss train_loss = [] for j, data in enumerate(tqdm(train_loader)): # fetch data, i.e., B x C x W x H (batchsize x channel x wdith x heigh) exemplar_imgs, instance_imgs = data # forward pass if use_gpu: exemplar_imgs = exemplar_imgs.cuda() instance_imgs = instance_imgs.cuda() output = net.forward(Variable(exemplar_imgs), Variable(instance_imgs)) # create label for training (only do it one time) if not train_response_flag: # change control flag train_response_flag = True # get shape of output (i.e., response map) response_size = output.shape[2:4] # generate label and weight train_eltwise_label, train_instance_weight = create_label( response_size, config, use_gpu) # clear the gradient optimizer.zero_grad() # loss loss = net.weight_loss(output, train_eltwise_label, train_instance_weight) # backward loss.backward() # update parameter optimizer.step() # collect training loss train_loss.append(loss.data) # ------------------------------ saving model ------------------------------ if not os.path.exists(model_save_path): os.makedirs(model_save_path) # torch.save(net, model_save_path + "SiamFC_" + str(i + 1) + "_model.pth") torch.save( { 'state_dict': net.state_dict(), 'optim_dict': optimizer.state_dict(), 'epoch': i + 1, }, model_save_path + "SiamFC_" + str(i + 1) + "_model.pth") # ------------------------------ validation ------------------------------ # indicate validation net.eval() # used to collect validation loss val_loss = [] for j, data in enumerate(tqdm(val_loader)): exemplar_imgs, instance_imgs = data # forward pass if use_gpu: exemplar_imgs = exemplar_imgs.cuda() instance_imgs = instance_imgs.cuda() output = net.forward(Variable(exemplar_imgs), Variable(instance_imgs)) # create label for validation (only do it one time) if not valid_response_flag: valid_response_flag = True response_size = output.shape[2:4] valid_eltwise_label, valid_instance_weight = create_label( response_size, config, use_gpu) # loss loss = net.weight_loss(output, valid_eltwise_label, valid_instance_weight) # collect validation loss val_loss.append(loss.data) mean_train_loss = np.mean(np.array(train_loss, dtype=np.float32)) mean_val_loss = np.mean(np.array(val_loss, dtype=np.float32)) print("Epoch %d training loss: %f, validation loss: %f" % (i + 1, mean_train_loss, mean_val_loss)) writer.add_scalar("Loss/train", mean_train_loss, i + 1) writer.add_scalar("Loss/val", mean_val_loss, i + 1)
val_loss.append(loss.data) mean_train_loss = np.mean(np.array(train_loss, dtype=np.float32)) mean_val_loss = np.mean(np.array(val_loss, dtype=np.float32)) print("Epoch %d training loss: %f, validation loss: %f" % (i + 1, mean_train_loss, mean_val_loss)) writer.add_scalar("Loss/train", mean_train_loss, i + 1) writer.add_scalar("Loss/val", mean_val_loss, i + 1) def save_config(config, path="./"): with open(os.path.join(path, "train_config.txt"), 'w') as json_file: json.dump(config.__dict__, json_file, indent=4) if __name__ == "__main__": data_dir = "/home/vision/ILSVRC2015/ILSVRC2015_curated/Data/VID/train" train_imdb = "/home/vision/siamtrackopt/ILSVRC15-curation/imdb_video_train.json" val_imdb = "/home/vision/siamtrackopt/ILSVRC15-curation/imdb_video_val.json" exp_nbr = "X1.1" writer = SummaryWriter(f"runs/{exp_nbr}") save_config(Config(), f"./models_{exp_nbr}/") # training SiamFC network, using GPU by default train(data_dir, train_imdb, val_imdb, model_save_path=f"./models_{exp_nbr}/", type=NetType.X1_1) writer.close()
# writer = SummaryWriter(f"runs/{exp_nbr}") # config_FINN_W2_A2_28 = Config() # config_FINN_W2_A2_28.val_videos = 400 # config_FINN_W2_A2_28.examplar_size = 130 # config_FINN_W2_A2_28.instance_size = 256 # config_FINN_W2_A2_28.score_size = 30 # save_config(config_FINN_W2_A2_28, f"./models_{exp_nbr}/") # # training SiamFC network, using GPU by default # train(data_dir, train_imdb, val_imdb, model_save_path=f"./models_{exp_nbr}/", # type=NetType.FINN_W2_A2_X28, # config=config_FINN_W2_A2_28) # writer.close() exp_nbr = "FINN_W2_A2_29" + "_" + str(dt.now()) writer = SummaryWriter(f"runs/{exp_nbr}") config_FINN_W2_A2_29 = Config() config_FINN_W2_A2_29.val_videos = 400 config_FINN_W2_A2_29.examplar_size = 130 config_FINN_W2_A2_29.instance_size = 256 config_FINN_W2_A2_29.score_size = 30 save_config(config_FINN_W2_A2_29, f"./models_{exp_nbr}/") # training SiamFC network, using GPU by default train(data_dir, train_imdb, val_imdb, model_save_path=f"./models_{exp_nbr}/", type=NetType.FINN_W2_A2_X29, config=config_FINN_W2_A2_29) writer.close() exp_nbr = "FINN_W2_A2_30" + "_" + str(dt.now())
# exp_nbr = "FINN_W2_A8_X27_z256_x130" + "_" + str(dt.now()) # writer = SummaryWriter(f"runs/{exp_nbr}") # config_FINN_W2_A8_X27_z256_x130 = Config() # config_FINN_W2_A8_X27_z256_x130.val_videos = 400 # config_FINN_W2_A8_X27_z256_x130.examplar_size = 130 # config_FINN_W2_A8_X27_z256_x130.instance_size = 256 # config_FINN_W2_A8_X27_z256_x130.score_size = 30 # save_config(config_FINN_W2_A8_X27_z256_x130, f"./models_{exp_nbr}/") # # training SiamFC network, using GPU by default # train(data_dir, train_imdb, val_imdb, model_save_path=f"./models_{exp_nbr}/", # type=NetType.FINN_W2_A8_X27_z256_x130, # config=config_FINN_W2_A8_X27_z256_x130) # writer.close() exp_nbr = "FINN_W2_A2_X26_z256_x130" + "_" + str(dt.now()) writer = SummaryWriter(f"runs/{exp_nbr}") config_FINN_W2_A2_X26_z256_x130 = Config() config_FINN_W2_A2_X26_z256_x130.val_videos = 400 config_FINN_W2_A2_X26_z256_x130.examplar_size = 130 config_FINN_W2_A2_X26_z256_x130.instance_size = 256 config_FINN_W2_A2_X26_z256_x130.score_size = 30 save_config(config_FINN_W2_A2_X26_z256_x130, f"./models_{exp_nbr}/") # training SiamFC network, using GPU by default train(data_dir, train_imdb, val_imdb, model_save_path=f"./models_{exp_nbr}/", type=NetType.FINN_W2_A2_X26_z256_x130, config=config_FINN_W2_A2_X26_z256_x130) writer.close()
def __init__(self, network_type=None): super(SiamNet, self).__init__() self.network_type = network_type if network_type == NetType.FINN_W2_A2_X28: print("Network type: ") print(network_type) self.conv_features = create_model(first_layer_quant_type = QuantType.INT, first_layer_bit_width = 2, weight_quant_type = QuantType.INT, weight_bit_width = 2, last_layer_quant_type = QuantType.INT, last_layer_bit_width = 2, activation_quant_type = QuantType.INT, activation_bit_width = 2, activation_scaling_impl_type = ScalingImplType.CONST, activation_max_val = 6, chan_mult = 0.5) elif network_type == NetType.FINN_W2_A2_X29: print("Network type: ") print(network_type) self.conv_features = create_model(first_layer_quant_type = QuantType.INT, first_layer_bit_width = 2, weight_quant_type = QuantType.INT, weight_bit_width = 2, last_layer_quant_type = QuantType.INT, last_layer_bit_width = 2, activation_quant_type = QuantType.INT, activation_bit_width = 2, activation_scaling_impl_type = ScalingImplType.CONST, activation_max_val = 6, chan_mult = 0.25) elif network_type == NetType.FINN_W2_A2_X30: print("Network type: ") print(network_type) self.conv_features = create_model(first_layer_quant_type = QuantType.INT, first_layer_bit_width = 2, weight_quant_type = QuantType.INT, weight_bit_width = 2, last_layer_quant_type = QuantType.INT, last_layer_bit_width = 2, activation_quant_type = QuantType.INT, activation_bit_width = 2, activation_scaling_impl_type = ScalingImplType.CONST, activation_max_val = 6, chan_mult = 0.125) elif network_type == NetType.FINN_W2_A2_X31: print("Network type: ") print(network_type) self.conv_features = create_model(first_layer_quant_type=QuantType.INT, first_layer_bit_width=2, weight_quant_type=QuantType.INT, weight_bit_width=2, last_layer_quant_type=QuantType.INT, last_layer_bit_width=2, activation_quant_type=QuantType.INT, activation_bit_width=2, activation_scaling_impl_type=ScalingImplType.CONST, activation_max_val=6, chan_mult=0.0625) else: print("Error") exit() # adjust layer as in the original SiamFC in matconvnet self.adjust = nn.Conv2d(1, 1, 1, 1) # initialize weights self._initialize_weight() self.config = Config()