예제 #1
0
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

    print("Initializing model")

    # initializing the model
    model = MOVEModel(emb_size=args.emb_size)

    # loading a pre-trained model
    model_name = os.path.join(args.main_path, 'saved_models',
                              '{}_models'.format(args.exp_type),
                              'model_{}.pt'.format(experiment_name))
    model.load_state_dict(torch.load(model_name, map_location='cpu'))

    # sending the model to gpu, if available
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    remove_items = []

    with torch.no_grad():  # disabling gradient tracking
        model.eval()  # setting the model to evaluation mode

        # initializing an empty tensor for storing the embeddings
        embed_all = torch.tensor([], device=device)

        # iterating through the data loader
        for batch_idx, item in tqdm(enumerate(test_loader)):
            try:
                # sending the items to the proper device
                item = handle_device(item, device)
예제 #2
0
class KDTrainer(BaseTrainer):
    """
    Trainer object for Knowledge Distillation experiments.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the student model
            # obtaining the embeddings of each item in the batch
            embs_s = self.model(items)

            # if the distance-based KD loss is chosen,
            # we obtain the embeddings of each item from the teacher model
            with torch.no_grad():
                embs_t = self.teacher(
                    items) if self.cfg['kd_loss'] == 'distance' else None

            # calculating the KD loss for the iteration
            kd_loss = KD_LOSS_DICT[self.cfg['kd_loss']](
                embs_s=embs_s,
                embs_t=embs_t,
                emb_size=self.cfg['emb_size'],
                lp_layer=self.lp_layer,
                labels=labels,
                centroids=self.centroids)

            # calculating the triplet loss for the iteration
            main_loss = triplet_loss(
                data=embs_s,
                labels=labels,
                emb_size=self.cfg['emb_size'],
                margin=self.cfg['margin'],
                mining_strategy=self.cfg['mining_strategy'])

            # summing KD and triplet loss values
            loss = kd_loss + main_loss

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the student model and sending it to the proper device
        self.model = MOVEModel(emb_size=self.cfg['emb_size'],
                               sum_method=4,
                               final_activation=3)
        self.model.to(self.device)

        # initializing necessary models/data for KD training
        self.teacher = None
        self.lp_layer = None
        self.centroids = None

        # creating the teacher model and sending it to the proper device
        # this step is for the distance-based KD training
        if self.cfg['kd_loss'] == 'distance':
            self.teacher = MOVEModel(emb_size=16000,
                                     sum_method=4,
                                     final_activation=3)
            self.teacher.load_state_dict(
                torch.load(os.path.join(self.cfg['main_path'],
                                        'saved_models/model_move.pt'),
                           map_location='cpu'))
            self.teacher.to(self.device)
            self.teacher.eval()

        # creating the linear projection layer and loading the class centroids
        # this step is for the cluster-based KD training
        elif self.cfg['kd_loss'] == 'cluster':
            self.lp_layer = nn.Linear(in_features=16000,
                                      out_features=self.cfg['emb_size'],
                                      bias=False)
            self.lp_layer.to(self.device)
            self.centroids = torch.load(
                os.path.join(self.cfg['main_path'], 'data/centroids.pt'))

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer.
        In the case of distance-based KD training, no additional parameters are given to the optimizer.
        In the case of cluster-based KD training, the parameters of the linear projection layer are updated,
        as well as the parameters of the student model.
        """
        # getting the parameters of the student model
        opt_params = list(self.model.parameters())

        # for the cluster-based KD training, append the parameters of
        # the linear projection layer for the optimizer
        if self.cfg['kd_loss'] == 'cluster':
            opt_params += list(self.lp_layer.parameters())

        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(opt_params,
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None
def evaluate(exp_name,
             exp_type,
             main_path,
             emb_size,
             loss,
             data_dir):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    print('Evaluating model {}.'.format(exp_name))

    file_list = enumerate_h5_files(data_dir)
    file_list.sort(key=lambda x: os.path.splitext(os.path.basename(x))[0])
    print("Number feature files: {}".format(len(file_list)))

    data = []
    name = list(map(lambda x: os.path.splitext(os.path.relpath(x, data_dir))[0], file_list))
    print("name: {}".format(name))
    #image_with_index_list = dict(zip(name, range(len(name))))
    #print("image_with_index_list: {}".format(image_with_index_list))

    for file in tqdm(file_list):
        temp_crema = dd.io.load(file)["crema"]
        #print("crema shape: {}".format(temp_crema.shape))
        idxs = np.arange(0, temp_crema.shape[0], 8)

        temp_tensor = torch.from_numpy(temp_crema[idxs].T)

        data.append(torch.cat((temp_tensor, temp_tensor))[:23].unsqueeze(0))
        #name.append(os.path.splitext(os.path.basename(file))[0])

    test_set = FullSizeInstanceDataset(data=data)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

    print("Initializing model")

    # initializing the model
    model = MOVEModel(emb_size=emb_size)

    # loading a pre-trained model
    model_name = os.path.join(main_path, 'saved_models', '{}_models'.format(exp_type), 'model_{}.pt'.format(exp_name))
    model.load_state_dict(torch.load(model_name, map_location='cpu'))

    # sending the model to gpu, if available
    model.to(device)

    remove_items = []

    with torch.no_grad():  # disabling gradient tracking
        model.eval()  # setting the model to evaluation mode

        # initializing an empty tensor for storing the embeddings
        embed_all = torch.tensor([], device=device)

        # iterating through the data loader
        for batch_idx, item in tqdm(enumerate(test_loader)):
            try:
                # sending the items to the proper device
                item = handle_device(item, device)

                # forward pass of the model
                # obtaining the embeddings of each item in the batch
                emb = model(item)

                # appending the current embedding to the collection of embeddings
                embed_all = torch.cat((embed_all, emb))
            except Exception as e:
                print("Error: {}, input shape: {}, index".format(e, item.shape, batch_idx))
                remove_items.append(name[batch_idx])
                continue
        for re_item in remove_items:
            name.remove(re_item)
            print("name length: {}".format(len(name)))
        image_with_index_list = dict(zip(name, range(len(name))))

        embed_all = F.normalize(embed_all, p=2, dim=1)

    return embed_all.cpu(), image_with_index_list
예제 #4
0
class LSRTrainer(BaseTrainer):
    """
    Trainer object for Latent Space Reconfiguration experiments.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # for the first epoch, only the linear layer is trained.
        # starting from the second epoch, all the parameters of the model are trained.
        if self.current_epoch == 1:
            for param in self.model.parameters():
                param.requires_grad = True

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            embs = self.model(items)

            # calculating the loss value for the iteration
            loss = LOSS_DICT[self.cfg['loss']](
                data=embs,
                labels=labels,
                emb_size=self.model.fin_emb_size,
                proxies=self.proxies,
                margin=self.cfg['margin'],
                mining_strategy=self.cfg['mining_strategy'])

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating and loading the learned parameters of the MOVE model
        # this model stands as our base model
        self.model = MOVEModel(emb_size=16000,
                               sum_method=4,
                               final_activation=3)
        self.model.load_state_dict(
            torch.load(os.path.join(self.cfg['main_path'],
                                    'saved_models/model_move.pt'),
                       map_location='cpu'))

        # freezing the parameters of all the parameters of the base model
        for param in self.model.parameters():
            param.requires_grad = False

        # creating a new linear layer and a new batch normalization layer
        self.model.lin1 = torch.nn.Linear(in_features=256,
                                          out_features=self.cfg['emb_size'],
                                          bias=False)
        self.model.lin_bn = torch.nn.BatchNorm1d(self.cfg['emb_size'],
                                                 affine=False)

        # setting the embedding size of the model
        self.model.fin_emb_size = self.cfg['emb_size']

        # sending the model to the proper device
        self.model.to(self.device)

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer. For LSR training, we have two types of parameters.
        'new_param' are the ones from the new linear layer,
        and 'finetune_param' are the ones from the 'feature extractor' part of MOVE model.
        By distinguishing them, we can set different learning rates for each parameter group.
        """
        # getting parameter groups as explained above
        param_list = ['lin1.weight', 'lin1.bias']
        new_param = [
            par[1] for par in self.model.named_parameters()
            if par[0] in param_list
        ]
        finetune_param = [
            par[1] for par in self.model.named_parameters()
            if par[0] not in param_list
        ]

        # initializing proxies if a proxy-based loss is used
        self.proxies = None
        if self.cfg['loss'] in [1, 2, 3]:
            self.proxies = torch.nn.Parameter(
                torch.randn(14499,
                            self.cfg['emb_size'],
                            requires_grad=True,
                            device=self.device))
            new_param.append(self.proxies)

        # setting the proper learning rates and initializing the optimizer
        opt_params = [{
            'params': finetune_param,
            'lr': self.cfg['finetune_learning_rate']
        }, {
            'params': new_param
        }]

        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(opt_params,
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None
예제 #5
0
def evaluate(exp_name, exp_type, main_path, emb_size, loss):
    """
    Main evaluation function of MOVE. For a detailed explanation of parameters,
    please check 'python move_main.py -- help'
    :param main_path: main working directory
    :param exp_name: name to save model and experiment summary
    :param exp_type: type of experiment
    :param emb_size: the size of the final embeddings produced by the model
    :param loss: the loss used for training the model
    """

    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    eval_dataset = os.path.join(main_path, 'data/benchmark_crema.pt')

    print('Evaluating model {} on dataset {}.'.format(exp_name, eval_dataset))

    # initializing the model
    model = MOVEModel(emb_size=emb_size)

    # loading a pre-trained model
    model_name = os.path.join(main_path, 'saved_models',
                              '{}_models'.format(exp_type),
                              'model_{}.pt'.format(exp_name))
    model.load_state_dict(torch.load(model_name, map_location='cpu'))

    # sending the model to gpu, if available
    model.to(device)

    # loading test data, initializing the dataset object and the data loader
    test_data, test_labels = import_dataset_from_pt(filename=eval_dataset,
                                                    suffix=False)
    test_set = FullSizeInstanceDataset(data=test_data)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

    start_time = time.monotonic()

    with torch.no_grad():  # disabling gradient tracking
        model.eval()  # setting the model to evaluation mode

        # initializing an empty tensor for storing the embeddings
        embed_all = torch.tensor([], device=device)

        # iterating through the data loader
        for batch_idx, item in enumerate(test_loader):
            # sending the items to the proper device
            item = handle_device(item, device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            emb = model(item)

            # appending the current embedding to the collection of embeddings
            embed_all = torch.cat((embed_all, emb))

        # if Triplet or ProxyNCA loss is used, the distance function is Euclidean distance
        if loss in [0, 1]:
            dist_all = pairwise_euclidean_distance(embed_all)
            dist_all /= model.fin_emb_size
        # if NormalizedSoftmax loss is used, the distance function is cosine distance
        elif loss == 2:
            dist_all = -1 * pairwise_cosine_similarity(embed_all)
        # if Group loss is used, the distance function is Pearson correlation coefficient
        else:
            dist_all = -1 * pairwise_pearson_coef(embed_all)

    # computing evaluation metrics from the obtained distances
    average_precision(-1 * dist_all.cpu().float().clone() +
                      torch.diag(torch.ones(len(test_data)) * float('-inf')),
                      dataset=1)

    test_time = time.monotonic() - start_time

    print('Total time: {:.0f}m{:.0f}s.'.format(test_time // 60,
                                               test_time % 60))
예제 #6
0
class MOVETrainer(BaseTrainer):
    """
    Trainer object for baseline experiments with MOVE.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            embs = self.model(items)

            # calculating the loss value for the iteration
            loss = LOSS_DICT[self.cfg['loss']](
                data=embs,
                labels=labels,
                emb_size=self.model.fin_emb_size,
                proxies=self.proxies,
                margin=self.cfg['margin'],
                mining_strategy=self.cfg['mining_strategy'])

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the model and sending it to the proper device
        self.model = MOVEModel(emb_size=self.cfg['emb_size'])
        self.model.to(self.device)

        # computing and printing the total number of parameters of the model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer.
        """
        # parameters to train
        opt_params = list(self.model.parameters())

        # initializing proxies if a proxy-based loss is used
        self.proxies = None
        if self.cfg['loss'] in [1, 2, 3]:
            self.proxies = torch.nn.Parameter(
                torch.randn(14499,
                            self.cfg['emb_size'],
                            requires_grad=True,
                            device=self.device))
            opt_params.append(self.proxies)

        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(opt_params,
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None
예제 #7
0
class PruningTrainer(BaseTrainer):
    """
    Trainer object for Pruning experiments.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def train(self, save_logs=True):
        """
        Main training function for Pruning experiments.
        It overrides the training function of the BaseTrainer for adding
        pruning-related functionality.
        :param save_logs: whether to save training and validation loss logs
        """
        # save the initial parameters of the model for other pruning iterations
        torch.save(
            self.model.state_dict(),
            os.path.join(self.cfg['main_path'], 'saved_models',
                         'pruning_models',
                         'model_{}_initial.pt'.format(self.experiment_name)))

        # iterating full-training cycles for pruning
        for prune_iteration in range(self.cfg['pruning_iterations'] + 1):
            self.prune_iteration = prune_iteration

            # loading the initial parameters of the model
            if prune_iteration > 0:
                self.model.load_state_dict(
                    torch.load(
                        os.path.join(
                            self.cfg['main_path'], 'saved_models',
                            'pruning_models', 'model_{}_initial.pt'.format(
                                self.experiment_name))))

                # resetting the learning rate
                for param_group in self.optimizer.param_groups:
                    param_group['lr'] = self.cfg['learning_rate']

                # re-creating the learning rate schedule for the new training cycle
                self.create_lr_scheduler()

            # execute a full training cycle
            super().train(save_logs=False)

            # selecting which indices of the linear layer to prune
            # based on the trained model
            self.select_indices_to_prune()

        if save_logs:
            with open(
                    './experiment_logs/{}_logs/{}.json'.format(
                        self.cfg['exp_type'], self.experiment_name), 'w') as f:
                json.dump(
                    {
                        'train_loss_log': self.train_loss_log,
                        'val_loss_log': self.val_loss_log
                    }, f)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            embs = self.model(items)

            # calculating the loss value for the iteration
            loss = triplet_loss(data=embs,
                                labels=labels,
                                emb_size=self.cfg['emb_size'],
                                margin=self.cfg['margin'],
                                mining_strategy=self.cfg['mining_strategy'])

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # applying the zero-mask to the selected indices
            if self.prune_iteration > 0:
                self.apply_mask()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def apply_mask(self):
        """
        Applying the mask tensor to the linear layer to 'prune' weights.
        """
        self.model.lin1.weight.data = self.model.lin1.weight.data * self.mask
        self.model.fin_emb_size = self.model.lin1.weight.shape[
            0] - NUM_OF_ROWS_TO_PRUNE[self.prune_iteration]

    def select_indices_to_prune(self):
        """
        Selecting which indices to prune based on the trained model.
        :return:
        """
        self.indices_to_prune = torch.topk(
            torch.abs(self.model.lin1.weight).mean(dim=1),
            k=NUM_OF_ROWS_TO_PRUNE[self.prune_iteration],
            largest=False).indices

        # creating a mask of ones and zeros
        mask = torch.ones(self.model.lin1.weight.shape)
        zero_row = torch.zeros(1, self.model.lin1.weight.shape[1])

        # sending the tensors to the proper device
        mask = handle_device(mask, self.device)
        zero_row = handle_device(zero_row, self.device)

        # finalizing the mask based on the selected indices
        mask[self.indices_to_prune] = zero_row
        self.mask = mask

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the model and sending it to the proper device
        self.model = MOVEModel(emb_size=16000)
        self.model.to(self.device)

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer.
        """
        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(self.model.parameters(),
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(self.model.parameters(),
                                    lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None