Example #1
0
    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the model and sending it to the proper device
        self.model = MOVEModel(emb_size=self.cfg['emb_size'])
        self.model.to(self.device)

        # computing and printing the total number of parameters of the model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))
Example #2
0
    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the student model and sending it to the proper device
        self.model = MOVEModel(emb_size=self.cfg['emb_size'],
                               sum_method=4,
                               final_activation=3)
        self.model.to(self.device)

        # initializing necessary models/data for KD training
        self.teacher = None
        self.lp_layer = None
        self.centroids = None

        # creating the teacher model and sending it to the proper device
        # this step is for the distance-based KD training
        if self.cfg['kd_loss'] == 'distance':
            self.teacher = MOVEModel(emb_size=16000,
                                     sum_method=4,
                                     final_activation=3)
            self.teacher.load_state_dict(
                torch.load(os.path.join(self.cfg['main_path'],
                                        'saved_models/model_move.pt'),
                           map_location='cpu'))
            self.teacher.to(self.device)
            self.teacher.eval()

        # creating the linear projection layer and loading the class centroids
        # this step is for the cluster-based KD training
        elif self.cfg['kd_loss'] == 'cluster':
            self.lp_layer = nn.Linear(in_features=16000,
                                      out_features=self.cfg['emb_size'],
                                      bias=False)
            self.lp_layer.to(self.device)
            self.centroids = torch.load(
                os.path.join(self.cfg['main_path'], 'data/centroids.pt'))

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))
Example #3
0
    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating and loading the learned parameters of the MOVE model
        # this model stands as our base model
        self.model = MOVEModel(emb_size=16000,
                               sum_method=4,
                               final_activation=3)
        self.model.load_state_dict(
            torch.load(os.path.join(self.cfg['main_path'],
                                    'saved_models/model_move.pt'),
                       map_location='cpu'))

        # freezing the parameters of all the parameters of the base model
        for param in self.model.parameters():
            param.requires_grad = False

        # creating a new linear layer and a new batch normalization layer
        self.model.lin1 = torch.nn.Linear(in_features=256,
                                          out_features=self.cfg['emb_size'],
                                          bias=False)
        self.model.lin_bn = torch.nn.BatchNorm1d(self.cfg['emb_size'],
                                                 affine=False)

        # setting the embedding size of the model
        self.model.fin_emb_size = self.cfg['emb_size']

        # sending the model to the proper device
        self.model.to(self.device)

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))
Example #4
0
def evaluate(save_name, model_type, emb_size, sum_method, final_activation):
    print("Prepare random dataset")
    num_examples = 1000
    test_data = []
    for i in tqdm(range(num_examples)):
        t_length = np.random.randint(1000, 2000)
        cremaPCP = np.random.rand(t_length, 12)
        cremaPCP_tensor = torch.from_numpy(cremaPCP).t()
        cremaPCP_reshaped = torch.cat(
            (cremaPCP_tensor, cremaPCP_tensor))[:23].unsqueeze(0)
        test_data.append(cremaPCP_reshaped)

    test_map_set = MOVEDatasetFull(data=test_data)
    test_map_loader = DataLoader(test_map_set, batch_size=1, shuffle=False)

    print("Initialize model")
    # initializing the model
    if model_type == 0:
        move_model = MOVEModel(emb_size=emb_size,
                               sum_method=sum_method,
                               final_activation=final_activation)
    elif model_type == 1:
        move_model = MOVEModelNT(emb_size=emb_size,
                                 sum_method=sum_method,
                                 final_activation=final_activation)

    # loading a pre-trained model
    model_name = 'saved_models/model_{}.pt'.format(save_name)

    print("Load model")
    move_model.load_state_dict(torch.load(model_name, map_location='cpu'))
    move_model.eval()

    # sending the model to gpu, if available
    if torch.cuda.is_available():
        move_model.cuda()

    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    print("Run extract feature")
    with torch.no_grad():  # deactivating gradient tracking for testing
        move_model.eval()  # setting the model to evaluation mode

        # tensor for storing all the embeddings obtained from the test set
        embed_all = torch.tensor([], device=device)

        for batch_idx, item in tqdm(enumerate(test_map_loader)):

            if torch.cuda.is_available(
            ):  # sending the pcp features and the labels to cuda if available
                item = item.cuda()

            res_1 = move_model(
                item
            )  # obtaining the embeddings of each song in the mini-batch

            embed_all = torch.cat(
                (embed_all, res_1
                 ))  # adding the embedding of the current song to the others

    return embed_all.cpu()
Example #5
0
class KDTrainer(BaseTrainer):
    """
    Trainer object for Knowledge Distillation experiments.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the student model
            # obtaining the embeddings of each item in the batch
            embs_s = self.model(items)

            # if the distance-based KD loss is chosen,
            # we obtain the embeddings of each item from the teacher model
            with torch.no_grad():
                embs_t = self.teacher(
                    items) if self.cfg['kd_loss'] == 'distance' else None

            # calculating the KD loss for the iteration
            kd_loss = KD_LOSS_DICT[self.cfg['kd_loss']](
                embs_s=embs_s,
                embs_t=embs_t,
                emb_size=self.cfg['emb_size'],
                lp_layer=self.lp_layer,
                labels=labels,
                centroids=self.centroids)

            # calculating the triplet loss for the iteration
            main_loss = triplet_loss(
                data=embs_s,
                labels=labels,
                emb_size=self.cfg['emb_size'],
                margin=self.cfg['margin'],
                mining_strategy=self.cfg['mining_strategy'])

            # summing KD and triplet loss values
            loss = kd_loss + main_loss

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the student model and sending it to the proper device
        self.model = MOVEModel(emb_size=self.cfg['emb_size'],
                               sum_method=4,
                               final_activation=3)
        self.model.to(self.device)

        # initializing necessary models/data for KD training
        self.teacher = None
        self.lp_layer = None
        self.centroids = None

        # creating the teacher model and sending it to the proper device
        # this step is for the distance-based KD training
        if self.cfg['kd_loss'] == 'distance':
            self.teacher = MOVEModel(emb_size=16000,
                                     sum_method=4,
                                     final_activation=3)
            self.teacher.load_state_dict(
                torch.load(os.path.join(self.cfg['main_path'],
                                        'saved_models/model_move.pt'),
                           map_location='cpu'))
            self.teacher.to(self.device)
            self.teacher.eval()

        # creating the linear projection layer and loading the class centroids
        # this step is for the cluster-based KD training
        elif self.cfg['kd_loss'] == 'cluster':
            self.lp_layer = nn.Linear(in_features=16000,
                                      out_features=self.cfg['emb_size'],
                                      bias=False)
            self.lp_layer.to(self.device)
            self.centroids = torch.load(
                os.path.join(self.cfg['main_path'], 'data/centroids.pt'))

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer.
        In the case of distance-based KD training, no additional parameters are given to the optimizer.
        In the case of cluster-based KD training, the parameters of the linear projection layer are updated,
        as well as the parameters of the student model.
        """
        # getting the parameters of the student model
        opt_params = list(self.model.parameters())

        # for the cluster-based KD training, append the parameters of
        # the linear projection layer for the optimizer
        if self.cfg['kd_loss'] == 'cluster':
            opt_params += list(self.lp_layer.parameters())

        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(opt_params,
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None
Example #6
0
    for audio_file in tqdm(audio_files):
        crema_feature = compute_features(audio_path=audio_file,
                                         params=params,
                                         feature=feature)
        idxs = np.arange(0, crema_feature.shape[0], 8)
        temp_tensor = torch.from_numpy(crema_feature[idxs].T)
        crema_feature_list.append(
            torch.cat((temp_tensor, temp_tensor))[:23].unsqueeze(0))

    test_set = FullSizeInstanceDataset(data=crema_feature_list)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

    print("Initializing model")

    # initializing the model
    model = MOVEModel(emb_size=args.emb_size)

    # loading a pre-trained model
    model_name = os.path.join(args.main_path, 'saved_models',
                              '{}_models'.format(args.exp_type),
                              'model_{}.pt'.format(experiment_name))
    model.load_state_dict(torch.load(model_name, map_location='cpu'))

    # sending the model to gpu, if available
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    remove_items = []

    with torch.no_grad():  # disabling gradient tracking
        model.eval()  # setting the model to evaluation mode
Example #7
0
def train(save_name,
          train_path,
          chunks,
          val_path,
          save_model,
          save_summary,
          seed,
          num_of_epochs,
          model_type,
          emb_size,
          sum_method,
          final_activation,
          lr,
          lrsch,
          lrsch_factor,
          momentum,
          patch_len,
          num_of_labels,
          ytc,
          data_aug,
          norm_dist,
          mining_strategy,
          margin
          ):
    """
    Main training function of MOVE. For a detailed explanation of parameters,
    please check 'python move_main.py -- help'
    :param save_name: name to save model and experiment summary
    :param train_path: path of the training data
    :param chunks: how many chunks to use for the training data
    :param val_path: path of the validation data
    :param save_model: whether to save model (1) or not (0)
    :param save_summary: whether to save experiment summary (1) or not (0)
    :param seed: random seed
    :param num_of_epochs: number of epochs for training
    :param model_type: which model to use: MOVE (0) or MOVE without transposition invariance (1)
    :param emb_size: the size of the final embeddings produced by the model
    :param sum_method: the summarization method for the model
    :param final_activation: final activation to use for the model
    :param lr: value of learning rate
    :param lrsch: which learning rate scheduler to use
    :param lrsch_factor: the decrease rate of learning rate
    :param momentum: momentum for optimizer
    :param patch_len: number of frames for each song to be used in training
    :param num_of_labels: number of labels per mini-batch
    :param ytc: whether to exclude the songs overlapping with ytc for training
    :param data_aug: whether to use data augmentation
    :param norm_dist: whether to normalize squared euclidean distances with the embedding size
    :param mining_strategy: which mining strategy to use
    :param margin: the margin for the triplet loss
    """

    summary = dict()  # initializing the summary dict

    # initiating the necessary random seeds
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.cuda.manual_seed(seed)

    # initializing the model
    if model_type == 0:
        move_model = MOVEModel(emb_size=emb_size, sum_method=sum_method, final_activation=final_activation)
    elif model_type == 1:
        move_model = MOVEModelNT(emb_size=emb_size, sum_method=sum_method, final_activation=final_activation)
    else:
        raise Exception('Invalid number for the model parameter.')

    # sending the model to gpu, if available
    if torch.cuda.is_available():
        move_model.cuda()

    # initiating the optimizer
    optimizer = SGD(move_model.parameters(),
                    lr=lr,
                    momentum=momentum)

    # initializing the lists for tracking losses
    train_loss_log = []
    val_loss_log = []
    val_map_log = []

    # loading the training and validation data
    if chunks == 1:  # hack for handling 1 chunk for training data
        train_path = '{}_1.pt'.format(train_path)
    else:
        train_path = train_path
    train_data, train_labels = import_dataset_from_pt('data/{}'.format(train_path),
                                                      chunks=chunks, model_type=model_type)
    print('Train data has been loaded!')

    val_data, val_labels = import_dataset_from_pt('data/{}'.format(val_path), chunks=1, model_type=model_type)
    print('Validation data has been loaded!')

    # selecting the H dimension of the input data
    # different models handle different size inputs
    if model_type == 0:
        h = 23
    else:
        h = 12

    # initializing the MOVE dataset objects and data loaders
    # we use validation set to track two things, (1) triplet loss, (2) mean average precision
    # to check mean average precision on the full songs,
    # we need to define another dataset object and data loader for it
    train_set = MOVEDatasetFixed(train_data, train_labels, h=h, w=patch_len,
                                 data_aug=data_aug, ytc=ytc)
    train_loader = DataLoader(train_set, batch_size=num_of_labels, shuffle=True,
                              collate_fn=triplet_mining_collate, drop_last=True)

    val_set = MOVEDatasetFixed(val_data, val_labels, h=h, w=patch_len, data_aug=0)
    val_loader = DataLoader(val_set, batch_size=num_of_labels, shuffle=True,
                            collate_fn=triplet_mining_collate, drop_last=True)

    val_map_set = MOVEDatasetFull(val_data, val_labels)
    val_map_loader = DataLoader(val_map_set, batch_size=1, shuffle=False)

    # initializing the learning rate scheduler
    if lrsch == 0:
        pass
    else:
        if lrsch == 1:
            milestones = [80]
        else:
            milestones = [80, 100]
        lr_schedule = lr_scheduler.MultiStepLR(optimizer,
                                               milestones=milestones,
                                               gamma=lrsch_factor)

    # calculating the number of parameters of the model
    tmp = 0
    for p in move_model.parameters():
        tmp += np.prod(p.size())
    print('Num of parameters = {}'.format(int(tmp)))

    print('--- Training starts ---')
    print('Model name: {}'.format(save_name))

    start_time = time.monotonic()  # start time for tracking the duration of entire training

    # main training loop
    for epoch in range(num_of_epochs):
        last_epoch = epoch  # tracking last epoch to make sure that model didn't quit early

        start = time.monotonic()  # start time for the training loop
        train_loss = train_triplet_mining(move_model=move_model,
                                          optimizer=optimizer,
                                          train_loader=train_loader,
                                          margin=margin,
                                          norm_dist=norm_dist,
                                          mining_strategy=mining_strategy)
        print('Training loop: Epoch {} - Duration {:.2f} mins'.format(epoch, (time.monotonic()-start)/60))

        start = time.monotonic()  # start time for the validation loop
        val_loss = validate_triplet_mining(move_model=move_model,
                                           val_loader=val_loader,
                                           margin=margin,
                                           norm_dist=norm_dist,
                                           mining_strategy=mining_strategy)

        print('Validation loop: Epoch {} - Duration {:.2f} mins'.format(epoch, (time.monotonic()-start)/60))

        start = time.monotonic()  # start time for the mean average precision calculation

        # calculating the pairwise distances on validation set
        dist_map_matrix = test(move_model=move_model,
                               test_loader=val_map_loader).cpu()

        # calculation performance metrics
        # average_precision function uses similarities, not distances
        # we multiple the distances with -1, and set the diagonal (self-similarity) -inf
        val_map_score = average_precision(
            -1 * dist_map_matrix.float().clone() + torch.diag(torch.ones(len(val_data)) * float('-inf')),
            dataset=0)
        print('Test loop: Epoch {} - Duration {:.2f} mins'.format(epoch, (time.monotonic()-start)/60))

        # saving loss values for the summary
        train_loss_log.append(train_loss)
        val_loss_log.append(val_loss)
        val_map_log.append(val_map_score.item())

        # saving model if needed
        if save_model == 1:
            if not os.path.exists('saved_models/'):
                os.mkdir('saved_models/')
            torch.save(move_model.state_dict(), 'saved_models/model_{}.pt'.format(save_name))

        # printing the losses
        print('training_loss: {}'.format(train_loss))
        print('val_loss: {}'.format(val_loss))

        # activate learning rate scheduler if needed
        if lrsch != 0:
            lr_schedule.step()

        # dumping current loss values to the summary
        summary['train_loss_log'] = train_loss_log
        summary['val_loss_log'] = val_loss_log
        summary['val_map_log'] = val_map_log

        # save summary, if needed, after each epoch
        if save_summary == 1:
            if not os.path.exists('experiment_summaries/'):
                os.mkdir('experiment_summaries/')

            with open('experiment_summaries/summary_{}.json'.format(save_name), 'w') as log:
                json.dump(summary, log, indent='\t')

    end_time = time.monotonic()  # end time of the entire training loop

    # logging all code parameters in the summary file
    summary['save_name'] = save_name
    summary['train_path'] = train_path,
    summary['chunks'] = chunks,
    summary['val_path'] = val_path,
    summary['save_model'] = save_model,
    summary['save_summary'] = save_summary,
    summary['random_seed'] = seed,
    summary['num_of_epochs'] = num_of_epochs,
    summary['model_type'] = model_type,
    summary['emb_size'] = emb_size,
    summary['sum_method'] = sum_method,
    summary['final_activation'] = final_activation,
    summary['learning_rate'] = lr,
    summary['lr_schedule'] = lrsch,
    summary['lrsch_factor'] = lrsch_factor,
    summary['momentum'] = momentum,
    summary['patch_len'] = patch_len,
    summary['num_of_labels'] = num_of_labels,
    summary['ytc_labels'] = ytc,
    summary['data_aug'] = data_aug,
    summary['norm_dist'] = norm_dist,
    summary['mining_strategy'] = mining_strategy,
    summary['margin'] = margin

    summary['last_epoch'] = last_epoch
    summary['training_time'] = end_time - start_time

    summary['train_loss_log'] = train_loss_log
    summary['val_loss_log'] = val_loss_log
    summary['val_map_log'] = val_map_log

    # saving the last version of the summary
    if save_summary == 1:
        if not os.path.exists('experiment_summaries/'):
            os.mkdir('experiment_summaries/')

        with open('experiment_summaries/summary_{}.json'.format(save_name), 'w') as log:
            json.dump(summary, log, indent='\t')

    # saving the last version of the model
    if save_model == 1:
        if not os.path.exists('saved_models/'):
            os.mkdir('saved_models/')
        torch.save(move_model.state_dict(), 'saved_models/model_{}.pt'.format(save_name))
Example #8
0
def evaluate(save_name, model_type, emb_size, sum_method, final_activation,
             dataset, dataset_name):
    """
    Main evaluation function of MOVE. For a detailed explanation of parameters,
    please check 'python move_main.py -- help'
    :param save_name: name to save model and experiment summary
    :param model_type: which model to use: MOVE (0) or MOVE without transposition invariance (1)
    :param emb_size: the size of the final embeddings produced by the model
    :param sum_method: the summarization method for the model
    :param final_activation: final activation to use for the model
    :param dataset: which dataset to evaluate the model on. (0) validation set, (1) da-tacos, (2) ytc
    :param dataset_name: name of the file to evaluate
    """

    # indicating which dataset to use for evaluation
    # val_subset_crema is the name of our validation set
    if dataset_name == '':
        if dataset == 0:
            dataset_name = 'data/val_subset_crema.pt'
        elif dataset == 1:
            dataset_name = 'data/benchmark_crema.pt'
        else:
            dataset_name = 'data/ytc_crema.h5'
    else:
        dataset_name = 'data/{}'.format(dataset_name)

    print('Evaluating model {} on dataset {}.'.format(save_name, dataset_name))

    # initializing the model
    if model_type == 0:
        move_model = MOVEModel(emb_size=emb_size,
                               sum_method=sum_method,
                               final_activation=final_activation)
    elif model_type == 1:
        move_model = MOVEModelNT(emb_size=emb_size,
                                 sum_method=sum_method,
                                 final_activation=final_activation)

    # loading a pre-trained model
    model_name = 'saved_models/model_{}.pt'.format(save_name)

    move_model.load_state_dict(torch.load(model_name, map_location='cpu'))
    move_model.eval()

    # sending the model to gpu, if available
    if torch.cuda.is_available():
        move_model.cuda()

    # loading test data, initializing the dataset object and the data loader
    test_data, test_labels = import_dataset_from_pt(filename=dataset_name)
    test_map_set = MOVEDatasetFull(data=test_data, labels=test_labels)
    test_map_loader = DataLoader(test_map_set, batch_size=1, shuffle=False)

    # calculating the pairwise distances
    dist_map_matrix = test(move_model=move_model,
                           test_loader=test_map_loader).cpu()

    # calculating the performance metrics
    average_precision(-1 * dist_map_matrix.clone() +
                      torch.diag(torch.ones(len(test_data)) * float('-inf')),
                      dataset=dataset)
def evaluate(exp_name,
             exp_type,
             main_path,
             emb_size,
             loss,
             data_dir):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    print('Evaluating model {}.'.format(exp_name))

    file_list = enumerate_h5_files(data_dir)
    file_list.sort(key=lambda x: os.path.splitext(os.path.basename(x))[0])
    print("Number feature files: {}".format(len(file_list)))

    data = []
    name = list(map(lambda x: os.path.splitext(os.path.relpath(x, data_dir))[0], file_list))
    print("name: {}".format(name))
    #image_with_index_list = dict(zip(name, range(len(name))))
    #print("image_with_index_list: {}".format(image_with_index_list))

    for file in tqdm(file_list):
        temp_crema = dd.io.load(file)["crema"]
        #print("crema shape: {}".format(temp_crema.shape))
        idxs = np.arange(0, temp_crema.shape[0], 8)

        temp_tensor = torch.from_numpy(temp_crema[idxs].T)

        data.append(torch.cat((temp_tensor, temp_tensor))[:23].unsqueeze(0))
        #name.append(os.path.splitext(os.path.basename(file))[0])

    test_set = FullSizeInstanceDataset(data=data)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

    print("Initializing model")

    # initializing the model
    model = MOVEModel(emb_size=emb_size)

    # loading a pre-trained model
    model_name = os.path.join(main_path, 'saved_models', '{}_models'.format(exp_type), 'model_{}.pt'.format(exp_name))
    model.load_state_dict(torch.load(model_name, map_location='cpu'))

    # sending the model to gpu, if available
    model.to(device)

    remove_items = []

    with torch.no_grad():  # disabling gradient tracking
        model.eval()  # setting the model to evaluation mode

        # initializing an empty tensor for storing the embeddings
        embed_all = torch.tensor([], device=device)

        # iterating through the data loader
        for batch_idx, item in tqdm(enumerate(test_loader)):
            try:
                # sending the items to the proper device
                item = handle_device(item, device)

                # forward pass of the model
                # obtaining the embeddings of each item in the batch
                emb = model(item)

                # appending the current embedding to the collection of embeddings
                embed_all = torch.cat((embed_all, emb))
            except Exception as e:
                print("Error: {}, input shape: {}, index".format(e, item.shape, batch_idx))
                remove_items.append(name[batch_idx])
                continue
        for re_item in remove_items:
            name.remove(re_item)
            print("name length: {}".format(len(name)))
        image_with_index_list = dict(zip(name, range(len(name))))

        embed_all = F.normalize(embed_all, p=2, dim=1)

    return embed_all.cpu(), image_with_index_list
Example #10
0
class LSRTrainer(BaseTrainer):
    """
    Trainer object for Latent Space Reconfiguration experiments.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # for the first epoch, only the linear layer is trained.
        # starting from the second epoch, all the parameters of the model are trained.
        if self.current_epoch == 1:
            for param in self.model.parameters():
                param.requires_grad = True

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            embs = self.model(items)

            # calculating the loss value for the iteration
            loss = LOSS_DICT[self.cfg['loss']](
                data=embs,
                labels=labels,
                emb_size=self.model.fin_emb_size,
                proxies=self.proxies,
                margin=self.cfg['margin'],
                mining_strategy=self.cfg['mining_strategy'])

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating and loading the learned parameters of the MOVE model
        # this model stands as our base model
        self.model = MOVEModel(emb_size=16000,
                               sum_method=4,
                               final_activation=3)
        self.model.load_state_dict(
            torch.load(os.path.join(self.cfg['main_path'],
                                    'saved_models/model_move.pt'),
                       map_location='cpu'))

        # freezing the parameters of all the parameters of the base model
        for param in self.model.parameters():
            param.requires_grad = False

        # creating a new linear layer and a new batch normalization layer
        self.model.lin1 = torch.nn.Linear(in_features=256,
                                          out_features=self.cfg['emb_size'],
                                          bias=False)
        self.model.lin_bn = torch.nn.BatchNorm1d(self.cfg['emb_size'],
                                                 affine=False)

        # setting the embedding size of the model
        self.model.fin_emb_size = self.cfg['emb_size']

        # sending the model to the proper device
        self.model.to(self.device)

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer. For LSR training, we have two types of parameters.
        'new_param' are the ones from the new linear layer,
        and 'finetune_param' are the ones from the 'feature extractor' part of MOVE model.
        By distinguishing them, we can set different learning rates for each parameter group.
        """
        # getting parameter groups as explained above
        param_list = ['lin1.weight', 'lin1.bias']
        new_param = [
            par[1] for par in self.model.named_parameters()
            if par[0] in param_list
        ]
        finetune_param = [
            par[1] for par in self.model.named_parameters()
            if par[0] not in param_list
        ]

        # initializing proxies if a proxy-based loss is used
        self.proxies = None
        if self.cfg['loss'] in [1, 2, 3]:
            self.proxies = torch.nn.Parameter(
                torch.randn(14499,
                            self.cfg['emb_size'],
                            requires_grad=True,
                            device=self.device))
            new_param.append(self.proxies)

        # setting the proper learning rates and initializing the optimizer
        opt_params = [{
            'params': finetune_param,
            'lr': self.cfg['finetune_learning_rate']
        }, {
            'params': new_param
        }]

        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(opt_params,
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None
Example #11
0
def evaluate(exp_name, exp_type, main_path, emb_size, loss):
    """
    Main evaluation function of MOVE. For a detailed explanation of parameters,
    please check 'python move_main.py -- help'
    :param main_path: main working directory
    :param exp_name: name to save model and experiment summary
    :param exp_type: type of experiment
    :param emb_size: the size of the final embeddings produced by the model
    :param loss: the loss used for training the model
    """

    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

    eval_dataset = os.path.join(main_path, 'data/benchmark_crema.pt')

    print('Evaluating model {} on dataset {}.'.format(exp_name, eval_dataset))

    # initializing the model
    model = MOVEModel(emb_size=emb_size)

    # loading a pre-trained model
    model_name = os.path.join(main_path, 'saved_models',
                              '{}_models'.format(exp_type),
                              'model_{}.pt'.format(exp_name))
    model.load_state_dict(torch.load(model_name, map_location='cpu'))

    # sending the model to gpu, if available
    model.to(device)

    # loading test data, initializing the dataset object and the data loader
    test_data, test_labels = import_dataset_from_pt(filename=eval_dataset,
                                                    suffix=False)
    test_set = FullSizeInstanceDataset(data=test_data)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

    start_time = time.monotonic()

    with torch.no_grad():  # disabling gradient tracking
        model.eval()  # setting the model to evaluation mode

        # initializing an empty tensor for storing the embeddings
        embed_all = torch.tensor([], device=device)

        # iterating through the data loader
        for batch_idx, item in enumerate(test_loader):
            # sending the items to the proper device
            item = handle_device(item, device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            emb = model(item)

            # appending the current embedding to the collection of embeddings
            embed_all = torch.cat((embed_all, emb))

        # if Triplet or ProxyNCA loss is used, the distance function is Euclidean distance
        if loss in [0, 1]:
            dist_all = pairwise_euclidean_distance(embed_all)
            dist_all /= model.fin_emb_size
        # if NormalizedSoftmax loss is used, the distance function is cosine distance
        elif loss == 2:
            dist_all = -1 * pairwise_cosine_similarity(embed_all)
        # if Group loss is used, the distance function is Pearson correlation coefficient
        else:
            dist_all = -1 * pairwise_pearson_coef(embed_all)

    # computing evaluation metrics from the obtained distances
    average_precision(-1 * dist_all.cpu().float().clone() +
                      torch.diag(torch.ones(len(test_data)) * float('-inf')),
                      dataset=1)

    test_time = time.monotonic() - start_time

    print('Total time: {:.0f}m{:.0f}s.'.format(test_time // 60,
                                               test_time % 60))
Example #12
0
class MOVETrainer(BaseTrainer):
    """
    Trainer object for baseline experiments with MOVE.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            embs = self.model(items)

            # calculating the loss value for the iteration
            loss = LOSS_DICT[self.cfg['loss']](
                data=embs,
                labels=labels,
                emb_size=self.model.fin_emb_size,
                proxies=self.proxies,
                margin=self.cfg['margin'],
                mining_strategy=self.cfg['mining_strategy'])

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the model and sending it to the proper device
        self.model = MOVEModel(emb_size=self.cfg['emb_size'])
        self.model.to(self.device)

        # computing and printing the total number of parameters of the model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer.
        """
        # parameters to train
        opt_params = list(self.model.parameters())

        # initializing proxies if a proxy-based loss is used
        self.proxies = None
        if self.cfg['loss'] in [1, 2, 3]:
            self.proxies = torch.nn.Parameter(
                torch.randn(14499,
                            self.cfg['emb_size'],
                            requires_grad=True,
                            device=self.device))
            opt_params.append(self.proxies)

        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(opt_params,
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(opt_params, lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None
Example #13
0
class PruningTrainer(BaseTrainer):
    """
    Trainer object for Pruning experiments.
    """
    def __init__(self, cfg, experiment_name):
        """
        Initializing the trainer
        :param cfg: dictionary that holds the config hyper-parameters
        :param experiment_name: name of the experiment
        """
        # initializing the parent Trainer object
        super().__init__(cfg, experiment_name)

    def train(self, save_logs=True):
        """
        Main training function for Pruning experiments.
        It overrides the training function of the BaseTrainer for adding
        pruning-related functionality.
        :param save_logs: whether to save training and validation loss logs
        """
        # save the initial parameters of the model for other pruning iterations
        torch.save(
            self.model.state_dict(),
            os.path.join(self.cfg['main_path'], 'saved_models',
                         'pruning_models',
                         'model_{}_initial.pt'.format(self.experiment_name)))

        # iterating full-training cycles for pruning
        for prune_iteration in range(self.cfg['pruning_iterations'] + 1):
            self.prune_iteration = prune_iteration

            # loading the initial parameters of the model
            if prune_iteration > 0:
                self.model.load_state_dict(
                    torch.load(
                        os.path.join(
                            self.cfg['main_path'], 'saved_models',
                            'pruning_models', 'model_{}_initial.pt'.format(
                                self.experiment_name))))

                # resetting the learning rate
                for param_group in self.optimizer.param_groups:
                    param_group['lr'] = self.cfg['learning_rate']

                # re-creating the learning rate schedule for the new training cycle
                self.create_lr_scheduler()

            # execute a full training cycle
            super().train(save_logs=False)

            # selecting which indices of the linear layer to prune
            # based on the trained model
            self.select_indices_to_prune()

        if save_logs:
            with open(
                    './experiment_logs/{}_logs/{}.json'.format(
                        self.cfg['exp_type'], self.experiment_name), 'w') as f:
                json.dump(
                    {
                        'train_loss_log': self.train_loss_log,
                        'val_loss_log': self.val_loss_log
                    }, f)

    def handle_training_batches(self):
        """
        Training loop for one mini-epoch.
        :return: training loss for the current mini-epoch
        """
        # setting the model to training mode
        self.model.train()

        # initializing a list object to hold losses from each iteration
        epoch_loss = []

        # training loop
        for batch_idx, batch in enumerate(self.data_loader):
            # if overfit_batch == 1, only the same batch is trained.
            # this helps to see whether there are any issues with optimization.
            # a fast over-fitting behaviour is expected.
            if self.cfg['overfit_batch'] == 1:
                if batch_idx == 0:
                    overfit_batch = batch
                else:
                    batch = overfit_batch

            # making sure the data and labels are in the correct device and in float32 type
            items, labels = batch
            items = handle_device(items, self.device)
            labels = handle_device(labels, self.device)

            # forward pass of the model
            # obtaining the embeddings of each item in the batch
            embs = self.model(items)

            # calculating the loss value for the iteration
            loss = triplet_loss(data=embs,
                                labels=labels,
                                emb_size=self.cfg['emb_size'],
                                margin=self.cfg['margin'],
                                mining_strategy=self.cfg['mining_strategy'])

            # setting gradients of the optimizer to zero
            self.optimizer.zero_grad()

            # calculating gradients with backpropagation
            loss.backward()

            # updating the weights
            self.optimizer.step()

            # applying the zero-mask to the selected indices
            if self.prune_iteration > 0:
                self.apply_mask()

            # logging the loss value of the current batch
            epoch_loss.append(loss.detach().item())

        # logging the loss value of the current mini-epoch
        return np.mean(epoch_loss)

    def apply_mask(self):
        """
        Applying the mask tensor to the linear layer to 'prune' weights.
        """
        self.model.lin1.weight.data = self.model.lin1.weight.data * self.mask
        self.model.fin_emb_size = self.model.lin1.weight.shape[
            0] - NUM_OF_ROWS_TO_PRUNE[self.prune_iteration]

    def select_indices_to_prune(self):
        """
        Selecting which indices to prune based on the trained model.
        :return:
        """
        self.indices_to_prune = torch.topk(
            torch.abs(self.model.lin1.weight).mean(dim=1),
            k=NUM_OF_ROWS_TO_PRUNE[self.prune_iteration],
            largest=False).indices

        # creating a mask of ones and zeros
        mask = torch.ones(self.model.lin1.weight.shape)
        zero_row = torch.zeros(1, self.model.lin1.weight.shape[1])

        # sending the tensors to the proper device
        mask = handle_device(mask, self.device)
        zero_row = handle_device(zero_row, self.device)

        # finalizing the mask based on the selected indices
        mask[self.indices_to_prune] = zero_row
        self.mask = mask

    def create_model(self):
        """
        Initializing the model to optimize.
        """
        # creating the model and sending it to the proper device
        self.model = MOVEModel(emb_size=16000)
        self.model.to(self.device)

        # computing and printing the total number of parameters of the new model
        self.num_params = 0
        for param in self.model.parameters():
            self.num_params += np.prod(param.size())
        print('Total number of parameters for the model: {:.0f}'.format(
            self.num_params))

    def create_optimizer(self):
        """
        Initializing the optimizer.
        """
        if self.cfg['optimizer'] == 0:
            self.optimizer = torch.optim.SGD(self.model.parameters(),
                                             lr=self.cfg['learning_rate'],
                                             momentum=self.cfg['momentum'])
        elif self.cfg['optimizer'] == 1:
            self.optimizer = Ranger(self.model.parameters(),
                                    lr=self.cfg['learning_rate'])
        else:
            self.optimizer = None