test_loader = DataLoader(test_set, batch_size=1, shuffle=False) print("Initializing model") # initializing the model model = MOVEModel(emb_size=args.emb_size) # loading a pre-trained model model_name = os.path.join(args.main_path, 'saved_models', '{}_models'.format(args.exp_type), 'model_{}.pt'.format(experiment_name)) model.load_state_dict(torch.load(model_name, map_location='cpu')) # sending the model to gpu, if available device = 'cuda:0' if torch.cuda.is_available() else 'cpu' model.to(device) remove_items = [] with torch.no_grad(): # disabling gradient tracking model.eval() # setting the model to evaluation mode # initializing an empty tensor for storing the embeddings embed_all = torch.tensor([], device=device) # iterating through the data loader for batch_idx, item in tqdm(enumerate(test_loader)): try: # sending the items to the proper device item = handle_device(item, device)
class KDTrainer(BaseTrainer): """ Trainer object for Knowledge Distillation experiments. """ def __init__(self, cfg, experiment_name): """ Initializing the trainer :param cfg: dictionary that holds the config hyper-parameters :param experiment_name: name of the experiment """ # initializing the parent Trainer object super().__init__(cfg, experiment_name) def handle_training_batches(self): """ Training loop for one mini-epoch. :return: training loss for the current mini-epoch """ # setting the model to training mode self.model.train() # initializing a list object to hold losses from each iteration epoch_loss = [] # training loop for batch_idx, batch in enumerate(self.data_loader): # if overfit_batch == 1, only the same batch is trained. # this helps to see whether there are any issues with optimization. # a fast over-fitting behaviour is expected. if self.cfg['overfit_batch'] == 1: if batch_idx == 0: overfit_batch = batch else: batch = overfit_batch # making sure the data and labels are in the correct device and in float32 type items, labels = batch items = handle_device(items, self.device) labels = handle_device(labels, self.device) # forward pass of the student model # obtaining the embeddings of each item in the batch embs_s = self.model(items) # if the distance-based KD loss is chosen, # we obtain the embeddings of each item from the teacher model with torch.no_grad(): embs_t = self.teacher( items) if self.cfg['kd_loss'] == 'distance' else None # calculating the KD loss for the iteration kd_loss = KD_LOSS_DICT[self.cfg['kd_loss']]( embs_s=embs_s, embs_t=embs_t, emb_size=self.cfg['emb_size'], lp_layer=self.lp_layer, labels=labels, centroids=self.centroids) # calculating the triplet loss for the iteration main_loss = triplet_loss( data=embs_s, labels=labels, emb_size=self.cfg['emb_size'], margin=self.cfg['margin'], mining_strategy=self.cfg['mining_strategy']) # summing KD and triplet loss values loss = kd_loss + main_loss # setting gradients of the optimizer to zero self.optimizer.zero_grad() # calculating gradients with backpropagation loss.backward() # updating the weights self.optimizer.step() # logging the loss value of the current batch epoch_loss.append(loss.detach().item()) # logging the loss value of the current mini-epoch return np.mean(epoch_loss) def create_model(self): """ Initializing the model to optimize. """ # creating the student model and sending it to the proper device self.model = MOVEModel(emb_size=self.cfg['emb_size'], sum_method=4, final_activation=3) self.model.to(self.device) # initializing necessary models/data for KD training self.teacher = None self.lp_layer = None self.centroids = None # creating the teacher model and sending it to the proper device # this step is for the distance-based KD training if self.cfg['kd_loss'] == 'distance': self.teacher = MOVEModel(emb_size=16000, sum_method=4, final_activation=3) self.teacher.load_state_dict( torch.load(os.path.join(self.cfg['main_path'], 'saved_models/model_move.pt'), map_location='cpu')) self.teacher.to(self.device) self.teacher.eval() # creating the linear projection layer and loading the class centroids # this step is for the cluster-based KD training elif self.cfg['kd_loss'] == 'cluster': self.lp_layer = nn.Linear(in_features=16000, out_features=self.cfg['emb_size'], bias=False) self.lp_layer.to(self.device) self.centroids = torch.load( os.path.join(self.cfg['main_path'], 'data/centroids.pt')) # computing and printing the total number of parameters of the new model self.num_params = 0 for param in self.model.parameters(): self.num_params += np.prod(param.size()) print('Total number of parameters for the model: {:.0f}'.format( self.num_params)) def create_optimizer(self): """ Initializing the optimizer. In the case of distance-based KD training, no additional parameters are given to the optimizer. In the case of cluster-based KD training, the parameters of the linear projection layer are updated, as well as the parameters of the student model. """ # getting the parameters of the student model opt_params = list(self.model.parameters()) # for the cluster-based KD training, append the parameters of # the linear projection layer for the optimizer if self.cfg['kd_loss'] == 'cluster': opt_params += list(self.lp_layer.parameters()) if self.cfg['optimizer'] == 0: self.optimizer = torch.optim.SGD(opt_params, lr=self.cfg['learning_rate'], momentum=self.cfg['momentum']) elif self.cfg['optimizer'] == 1: self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate']) else: self.optimizer = None
def evaluate(exp_name, exp_type, main_path, emb_size, loss, data_dir): device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('Evaluating model {}.'.format(exp_name)) file_list = enumerate_h5_files(data_dir) file_list.sort(key=lambda x: os.path.splitext(os.path.basename(x))[0]) print("Number feature files: {}".format(len(file_list))) data = [] name = list(map(lambda x: os.path.splitext(os.path.relpath(x, data_dir))[0], file_list)) print("name: {}".format(name)) #image_with_index_list = dict(zip(name, range(len(name)))) #print("image_with_index_list: {}".format(image_with_index_list)) for file in tqdm(file_list): temp_crema = dd.io.load(file)["crema"] #print("crema shape: {}".format(temp_crema.shape)) idxs = np.arange(0, temp_crema.shape[0], 8) temp_tensor = torch.from_numpy(temp_crema[idxs].T) data.append(torch.cat((temp_tensor, temp_tensor))[:23].unsqueeze(0)) #name.append(os.path.splitext(os.path.basename(file))[0]) test_set = FullSizeInstanceDataset(data=data) test_loader = DataLoader(test_set, batch_size=1, shuffle=False) print("Initializing model") # initializing the model model = MOVEModel(emb_size=emb_size) # loading a pre-trained model model_name = os.path.join(main_path, 'saved_models', '{}_models'.format(exp_type), 'model_{}.pt'.format(exp_name)) model.load_state_dict(torch.load(model_name, map_location='cpu')) # sending the model to gpu, if available model.to(device) remove_items = [] with torch.no_grad(): # disabling gradient tracking model.eval() # setting the model to evaluation mode # initializing an empty tensor for storing the embeddings embed_all = torch.tensor([], device=device) # iterating through the data loader for batch_idx, item in tqdm(enumerate(test_loader)): try: # sending the items to the proper device item = handle_device(item, device) # forward pass of the model # obtaining the embeddings of each item in the batch emb = model(item) # appending the current embedding to the collection of embeddings embed_all = torch.cat((embed_all, emb)) except Exception as e: print("Error: {}, input shape: {}, index".format(e, item.shape, batch_idx)) remove_items.append(name[batch_idx]) continue for re_item in remove_items: name.remove(re_item) print("name length: {}".format(len(name))) image_with_index_list = dict(zip(name, range(len(name)))) embed_all = F.normalize(embed_all, p=2, dim=1) return embed_all.cpu(), image_with_index_list
class LSRTrainer(BaseTrainer): """ Trainer object for Latent Space Reconfiguration experiments. """ def __init__(self, cfg, experiment_name): """ Initializing the trainer :param cfg: dictionary that holds the config hyper-parameters :param experiment_name: name of the experiment """ # initializing the parent Trainer object super().__init__(cfg, experiment_name) def handle_training_batches(self): """ Training loop for one mini-epoch. :return: training loss for the current mini-epoch """ # setting the model to training mode self.model.train() # initializing a list object to hold losses from each iteration epoch_loss = [] # for the first epoch, only the linear layer is trained. # starting from the second epoch, all the parameters of the model are trained. if self.current_epoch == 1: for param in self.model.parameters(): param.requires_grad = True # training loop for batch_idx, batch in enumerate(self.data_loader): # if overfit_batch == 1, only the same batch is trained. # this helps to see whether there are any issues with optimization. # a fast over-fitting behaviour is expected. if self.cfg['overfit_batch'] == 1: if batch_idx == 0: overfit_batch = batch else: batch = overfit_batch # making sure the data and labels are in the correct device and in float32 type items, labels = batch items = handle_device(items, self.device) labels = handle_device(labels, self.device) # forward pass of the model # obtaining the embeddings of each item in the batch embs = self.model(items) # calculating the loss value for the iteration loss = LOSS_DICT[self.cfg['loss']]( data=embs, labels=labels, emb_size=self.model.fin_emb_size, proxies=self.proxies, margin=self.cfg['margin'], mining_strategy=self.cfg['mining_strategy']) # setting gradients of the optimizer to zero self.optimizer.zero_grad() # calculating gradients with backpropagation loss.backward() # updating the weights self.optimizer.step() # logging the loss value of the current batch epoch_loss.append(loss.detach().item()) # logging the loss value of the current mini-epoch return np.mean(epoch_loss) def create_model(self): """ Initializing the model to optimize. """ # creating and loading the learned parameters of the MOVE model # this model stands as our base model self.model = MOVEModel(emb_size=16000, sum_method=4, final_activation=3) self.model.load_state_dict( torch.load(os.path.join(self.cfg['main_path'], 'saved_models/model_move.pt'), map_location='cpu')) # freezing the parameters of all the parameters of the base model for param in self.model.parameters(): param.requires_grad = False # creating a new linear layer and a new batch normalization layer self.model.lin1 = torch.nn.Linear(in_features=256, out_features=self.cfg['emb_size'], bias=False) self.model.lin_bn = torch.nn.BatchNorm1d(self.cfg['emb_size'], affine=False) # setting the embedding size of the model self.model.fin_emb_size = self.cfg['emb_size'] # sending the model to the proper device self.model.to(self.device) # computing and printing the total number of parameters of the new model self.num_params = 0 for param in self.model.parameters(): self.num_params += np.prod(param.size()) print('Total number of parameters for the model: {:.0f}'.format( self.num_params)) def create_optimizer(self): """ Initializing the optimizer. For LSR training, we have two types of parameters. 'new_param' are the ones from the new linear layer, and 'finetune_param' are the ones from the 'feature extractor' part of MOVE model. By distinguishing them, we can set different learning rates for each parameter group. """ # getting parameter groups as explained above param_list = ['lin1.weight', 'lin1.bias'] new_param = [ par[1] for par in self.model.named_parameters() if par[0] in param_list ] finetune_param = [ par[1] for par in self.model.named_parameters() if par[0] not in param_list ] # initializing proxies if a proxy-based loss is used self.proxies = None if self.cfg['loss'] in [1, 2, 3]: self.proxies = torch.nn.Parameter( torch.randn(14499, self.cfg['emb_size'], requires_grad=True, device=self.device)) new_param.append(self.proxies) # setting the proper learning rates and initializing the optimizer opt_params = [{ 'params': finetune_param, 'lr': self.cfg['finetune_learning_rate'] }, { 'params': new_param }] if self.cfg['optimizer'] == 0: self.optimizer = torch.optim.SGD(opt_params, lr=self.cfg['learning_rate'], momentum=self.cfg['momentum']) elif self.cfg['optimizer'] == 1: self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate']) else: self.optimizer = None
def evaluate(exp_name, exp_type, main_path, emb_size, loss): """ Main evaluation function of MOVE. For a detailed explanation of parameters, please check 'python move_main.py -- help' :param main_path: main working directory :param exp_name: name to save model and experiment summary :param exp_type: type of experiment :param emb_size: the size of the final embeddings produced by the model :param loss: the loss used for training the model """ device = 'cuda:0' if torch.cuda.is_available() else 'cpu' eval_dataset = os.path.join(main_path, 'data/benchmark_crema.pt') print('Evaluating model {} on dataset {}.'.format(exp_name, eval_dataset)) # initializing the model model = MOVEModel(emb_size=emb_size) # loading a pre-trained model model_name = os.path.join(main_path, 'saved_models', '{}_models'.format(exp_type), 'model_{}.pt'.format(exp_name)) model.load_state_dict(torch.load(model_name, map_location='cpu')) # sending the model to gpu, if available model.to(device) # loading test data, initializing the dataset object and the data loader test_data, test_labels = import_dataset_from_pt(filename=eval_dataset, suffix=False) test_set = FullSizeInstanceDataset(data=test_data) test_loader = DataLoader(test_set, batch_size=1, shuffle=False) start_time = time.monotonic() with torch.no_grad(): # disabling gradient tracking model.eval() # setting the model to evaluation mode # initializing an empty tensor for storing the embeddings embed_all = torch.tensor([], device=device) # iterating through the data loader for batch_idx, item in enumerate(test_loader): # sending the items to the proper device item = handle_device(item, device) # forward pass of the model # obtaining the embeddings of each item in the batch emb = model(item) # appending the current embedding to the collection of embeddings embed_all = torch.cat((embed_all, emb)) # if Triplet or ProxyNCA loss is used, the distance function is Euclidean distance if loss in [0, 1]: dist_all = pairwise_euclidean_distance(embed_all) dist_all /= model.fin_emb_size # if NormalizedSoftmax loss is used, the distance function is cosine distance elif loss == 2: dist_all = -1 * pairwise_cosine_similarity(embed_all) # if Group loss is used, the distance function is Pearson correlation coefficient else: dist_all = -1 * pairwise_pearson_coef(embed_all) # computing evaluation metrics from the obtained distances average_precision(-1 * dist_all.cpu().float().clone() + torch.diag(torch.ones(len(test_data)) * float('-inf')), dataset=1) test_time = time.monotonic() - start_time print('Total time: {:.0f}m{:.0f}s.'.format(test_time // 60, test_time % 60))
class MOVETrainer(BaseTrainer): """ Trainer object for baseline experiments with MOVE. """ def __init__(self, cfg, experiment_name): """ Initializing the trainer :param cfg: dictionary that holds the config hyper-parameters :param experiment_name: name of the experiment """ # initializing the parent Trainer object super().__init__(cfg, experiment_name) def handle_training_batches(self): """ Training loop for one mini-epoch. :return: training loss for the current mini-epoch """ # setting the model to training mode self.model.train() # initializing a list object to hold losses from each iteration epoch_loss = [] # training loop for batch_idx, batch in enumerate(self.data_loader): # if overfit_batch == 1, only the same batch is trained. # this helps to see whether there are any issues with optimization. # a fast over-fitting behaviour is expected. if self.cfg['overfit_batch'] == 1: if batch_idx == 0: overfit_batch = batch else: batch = overfit_batch # making sure the data and labels are in the correct device and in float32 type items, labels = batch items = handle_device(items, self.device) labels = handle_device(labels, self.device) # forward pass of the model # obtaining the embeddings of each item in the batch embs = self.model(items) # calculating the loss value for the iteration loss = LOSS_DICT[self.cfg['loss']]( data=embs, labels=labels, emb_size=self.model.fin_emb_size, proxies=self.proxies, margin=self.cfg['margin'], mining_strategy=self.cfg['mining_strategy']) # setting gradients of the optimizer to zero self.optimizer.zero_grad() # calculating gradients with backpropagation loss.backward() # updating the weights self.optimizer.step() # logging the loss value of the current batch epoch_loss.append(loss.detach().item()) # logging the loss value of the current mini-epoch return np.mean(epoch_loss) def create_model(self): """ Initializing the model to optimize. """ # creating the model and sending it to the proper device self.model = MOVEModel(emb_size=self.cfg['emb_size']) self.model.to(self.device) # computing and printing the total number of parameters of the model self.num_params = 0 for param in self.model.parameters(): self.num_params += np.prod(param.size()) print('Total number of parameters for the model: {:.0f}'.format( self.num_params)) def create_optimizer(self): """ Initializing the optimizer. """ # parameters to train opt_params = list(self.model.parameters()) # initializing proxies if a proxy-based loss is used self.proxies = None if self.cfg['loss'] in [1, 2, 3]: self.proxies = torch.nn.Parameter( torch.randn(14499, self.cfg['emb_size'], requires_grad=True, device=self.device)) opt_params.append(self.proxies) if self.cfg['optimizer'] == 0: self.optimizer = torch.optim.SGD(opt_params, lr=self.cfg['learning_rate'], momentum=self.cfg['momentum']) elif self.cfg['optimizer'] == 1: self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate']) else: self.optimizer = None
class PruningTrainer(BaseTrainer): """ Trainer object for Pruning experiments. """ def __init__(self, cfg, experiment_name): """ Initializing the trainer :param cfg: dictionary that holds the config hyper-parameters :param experiment_name: name of the experiment """ # initializing the parent Trainer object super().__init__(cfg, experiment_name) def train(self, save_logs=True): """ Main training function for Pruning experiments. It overrides the training function of the BaseTrainer for adding pruning-related functionality. :param save_logs: whether to save training and validation loss logs """ # save the initial parameters of the model for other pruning iterations torch.save( self.model.state_dict(), os.path.join(self.cfg['main_path'], 'saved_models', 'pruning_models', 'model_{}_initial.pt'.format(self.experiment_name))) # iterating full-training cycles for pruning for prune_iteration in range(self.cfg['pruning_iterations'] + 1): self.prune_iteration = prune_iteration # loading the initial parameters of the model if prune_iteration > 0: self.model.load_state_dict( torch.load( os.path.join( self.cfg['main_path'], 'saved_models', 'pruning_models', 'model_{}_initial.pt'.format( self.experiment_name)))) # resetting the learning rate for param_group in self.optimizer.param_groups: param_group['lr'] = self.cfg['learning_rate'] # re-creating the learning rate schedule for the new training cycle self.create_lr_scheduler() # execute a full training cycle super().train(save_logs=False) # selecting which indices of the linear layer to prune # based on the trained model self.select_indices_to_prune() if save_logs: with open( './experiment_logs/{}_logs/{}.json'.format( self.cfg['exp_type'], self.experiment_name), 'w') as f: json.dump( { 'train_loss_log': self.train_loss_log, 'val_loss_log': self.val_loss_log }, f) def handle_training_batches(self): """ Training loop for one mini-epoch. :return: training loss for the current mini-epoch """ # setting the model to training mode self.model.train() # initializing a list object to hold losses from each iteration epoch_loss = [] # training loop for batch_idx, batch in enumerate(self.data_loader): # if overfit_batch == 1, only the same batch is trained. # this helps to see whether there are any issues with optimization. # a fast over-fitting behaviour is expected. if self.cfg['overfit_batch'] == 1: if batch_idx == 0: overfit_batch = batch else: batch = overfit_batch # making sure the data and labels are in the correct device and in float32 type items, labels = batch items = handle_device(items, self.device) labels = handle_device(labels, self.device) # forward pass of the model # obtaining the embeddings of each item in the batch embs = self.model(items) # calculating the loss value for the iteration loss = triplet_loss(data=embs, labels=labels, emb_size=self.cfg['emb_size'], margin=self.cfg['margin'], mining_strategy=self.cfg['mining_strategy']) # setting gradients of the optimizer to zero self.optimizer.zero_grad() # calculating gradients with backpropagation loss.backward() # updating the weights self.optimizer.step() # applying the zero-mask to the selected indices if self.prune_iteration > 0: self.apply_mask() # logging the loss value of the current batch epoch_loss.append(loss.detach().item()) # logging the loss value of the current mini-epoch return np.mean(epoch_loss) def apply_mask(self): """ Applying the mask tensor to the linear layer to 'prune' weights. """ self.model.lin1.weight.data = self.model.lin1.weight.data * self.mask self.model.fin_emb_size = self.model.lin1.weight.shape[ 0] - NUM_OF_ROWS_TO_PRUNE[self.prune_iteration] def select_indices_to_prune(self): """ Selecting which indices to prune based on the trained model. :return: """ self.indices_to_prune = torch.topk( torch.abs(self.model.lin1.weight).mean(dim=1), k=NUM_OF_ROWS_TO_PRUNE[self.prune_iteration], largest=False).indices # creating a mask of ones and zeros mask = torch.ones(self.model.lin1.weight.shape) zero_row = torch.zeros(1, self.model.lin1.weight.shape[1]) # sending the tensors to the proper device mask = handle_device(mask, self.device) zero_row = handle_device(zero_row, self.device) # finalizing the mask based on the selected indices mask[self.indices_to_prune] = zero_row self.mask = mask def create_model(self): """ Initializing the model to optimize. """ # creating the model and sending it to the proper device self.model = MOVEModel(emb_size=16000) self.model.to(self.device) # computing and printing the total number of parameters of the new model self.num_params = 0 for param in self.model.parameters(): self.num_params += np.prod(param.size()) print('Total number of parameters for the model: {:.0f}'.format( self.num_params)) def create_optimizer(self): """ Initializing the optimizer. """ if self.cfg['optimizer'] == 0: self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.cfg['learning_rate'], momentum=self.cfg['momentum']) elif self.cfg['optimizer'] == 1: self.optimizer = Ranger(self.model.parameters(), lr=self.cfg['learning_rate']) else: self.optimizer = None