def calc_loss(scores, qd_labels, sigma): # TODO: Fix this spaghetti shit ranking_boi = np.float64(scores.squeeze().cpu().detach().numpy()) # Get the ranking if not isinstance(ranking_boi, np.ndarray): ranking_boi = np.array([ranking_boi]) ranking, inv_ranking = rnk.rank_and_invert(ranking_boi) ranking = torch.tensor(ranking.copy()).cuda() # Get rid of pesky 1-document queries and initialize loss if len(scores) < 2: return 0, 0 loss = 0 # Vectorize the loss calculation # Calculate all score differences scorediff = scores - scores.T # Calculate all signs squeeze_labels = qd_labels.unsqueeze(-1) signs = torch.sign(squeeze_labels - qd_labels).float() # Loss is just vectorized formula # loss = (1/2) * (1-signs) * torch.sigmoid(scorediff) + torch.log(1 + torch.exp(-1 * sigma * scorediff)) loss = (1 / 2) * (1 - signs) * sigma * scorediff + torch.log( 1 + torch.exp(-1 * sigma * scorediff)) loss = loss.sum() return loss, ranking
def validate_ndcg(validation_set): with torch.no_grad(): total_ndcg = 0 for i, qid in enumerate(np.arange(validation_set.num_queries())): qd_feats = validation_set.query_feat(qid) qd_labels = validation_set.query_labels(qid) scores = model.forward(torch.tensor(qd_feats).float().cuda()) # Make em torchy? qd_feats = torch.tensor(qd_feats).cuda() qd_labels = torch.tensor(qd_labels).cuda() # TODO: Fix this spaghetti shit ranking_boi = np.float64(scores.squeeze().cpu().detach().numpy()) # Get the ranking if not isinstance(ranking_boi, np.ndarray): ranking_boi = np.array([ranking_boi]) ranking, inv_ranking = rnk.rank_and_invert(ranking_boi) ranking = torch.tensor(ranking.copy()).cuda() # Get rid of pesky 1-document queries and initialize loss if len(scores) < 2: continue total_ndcg += evaluate_model(model, validation_set, regression=True) return total_ndcg / validation_set.num_queries()
def get_IRM(pred_scores, true_scores): """ Get an array with all delta_IRM(i,j) for all doc combinations corresponding to a query. """ final_irm = [] # When all relevances are 0, we set all delta irm to 1 if sum(true_scores) != 0: true_ranking, _ = rnk.rank_and_invert(true_scores) before_irm = get_score(pred_scores, true_ranking) delta_irm = [] prev_id = 0 # Loop through all permutations of the docs for pair in itertools.permutations(enumerate(pred_scores), r=2): swap_scores = list(pred_scores) curr_id = pair[0][0] compare_id = pair[1][0] if curr_id == prev_id: delta_ij = calculate_delta(swap_scores, curr_id, compare_id, true_ranking, before_irm) delta_irm.append(delta_ij) else: final_irm.append(delta_irm) delta_ij = calculate_delta(swap_scores, curr_id, compare_id, true_ranking, before_irm) delta_irm = [delta_ij] prev_id = curr_id final_irm.append(delta_irm) else: final_irm = np.ones(((len(true_scores)), len(true_scores)-1)) return final_irm
def evaluate_model(model, validation_set, k = 5, regression = False): """ model: Neural network model that we are using validation_set: Pointer to the validation set k: Cutoff to be used to calculate NDCG regression: Boolean variable to denote whether the model used regression or classification. This changes the way we feed the scores to the NDCG function """ total_ndcg = 0 # Main loop over all queries in validation set for qid in np.arange(validation_set.num_queries()): # Get features, prediction scores and labels qd_feats = validation_set.query_feat(qid) qd_labels = validation_set.query_labels(qid) scores = model.forward(torch.tensor(qd_feats).cuda().float()) labels = torch.tensor(qd_labels).cuda() # Change scores to correct format for regression or classification if regression: prediction = scores prediction = prediction.squeeze(1).detach().cpu() else: softmax = F.softmax(scores) prediction = torch.argmax(softmax, dim=1) prediction = prediction.detach().cpu() # Get rankings based on scores pred_rank, _ = rnk.rank_and_invert(prediction) label_rank, _ = rnk.rank_and_invert(labels.detach().cpu()) # Convert ranking to sorting usable for calculating NDCG sorted_pred = np.array([qd_labels[idx] for idx in pred_rank]) label_rank = np.array(sorted(qd_labels, reverse=True)) # Get rid of pesky 1-doc queries and naively prevent division by zero if len(sorted_pred) > 1 and np.count_nonzero(qd_labels) != 0: # k to len(sorted_pred) does NDCG over entire ranking k = len(sorted_pred) total_ndcg += ndcg_at_k(sorted_pred, label_rank, k) return total_ndcg/validation_set.num_queries()
def get_score(pred_scores, true_ranking): """ Get the NDCG or ERR score of predicted scores vs true scores. """ pred_ranking, _ = rnk.rank_and_invert(pred_scores) if config.eval_metric == 'ndcg': score = evl.ndcg_at_k(pred_ranking, true_ranking, 0) elif config.eval_metric == 'err': score = evl.ERR(pred_ranking, true_ranking) else: raise Exception('Evaluation metric should be either ndcg or err.') return score
import dataset import ranking as rnk import numpy as np # we will rank 5 items with the scores: scores = np.array([10., 8., 12., 9., 5.]) ranking, inverted_ranking = rnk.rank_and_invert(scores) print('Ranking the scores: %s' % scores) print('Resulting ranking: %s' % ranking) print('This orders the scores as: %s' % scores[ranking]) print('an results in the inverted ranking: %s' % inverted_ranking) print('The inverted ranking allows us to quickly see that:') for i in range(scores.shape[0]): print('Item %d with score %0.02f has rank: %d' % (i, scores[i], inverted_ranking[i])) print('It is also very useful for computing rank differences,') print('for instance:') for i, j in [(1, 2), (2, 4), (0, 3)]: print('the difference between item %d (at rank %d)' ' and item %d (at rank %d) is %d' % (i, inverted_ranking[i], j, inverted_ranking[j], inverted_ranking[i] - inverted_ranking[j]))
print('Number of documents in test set: %d' % data.test.num_docs()) # initialize a random model random_model = np.random.uniform(size=data.num_features) # one score for every document (1d vector in ordering of the dataset) all_scores = np.dot(data.train.feature_matrix, random_model) # rank every query for all scores (1d vector ordered by query ordering in dataset) all_rankings, all_inverted_rankings = rnk.data_split_rank_and_invert(all_scores, data.train) qid = 1 s_i, e_i = data.train.query_range(qid) # to rank only a single query use rank_and_invert query_ranking, query_inverted_ranking = rnk.rank_and_invert(all_scores[s_i:e_i]) assert np.all(np.equal(query_ranking, all_rankings[s_i:e_i])) assert np.all(np.equal(query_inverted_ranking, all_inverted_rankings[s_i:e_i])) print('-------') print('Looking at query with id: %d' % qid) print('Number of documents in query %d: %d' % (qid, data.train.query_size(qid))) print('Scores for query %d: %s' % (qid, all_scores[s_i:e_i])) print('Ranking for query %d: %s' % (qid, all_rankings[s_i:e_i])) print('Inverted ranking for query %d: %s' % (qid, all_inverted_rankings[s_i:e_i])) validation_scores = np.dot(data.validation.feature_matrix, random_model) print('------') print('Evaluation on entire validation partition.') results = evl.evaluate(data.validation, validation_scores, print_results=True)
def run_epoch(model, optimizer, data, eval_every=10000, sigma=1, IRM='ndcg'): # Parameters temp_loss = 0 overall_loss = 0 epoch_loss = 0 # Main Pairwise RankNet function for i, qid in enumerate(np.arange(data.train.num_queries())): # Zero the gradient buffer and get doc,query combinations, labels and scores optimizer.zero_grad() qd_feats = data.train.query_feat(qid) qd_labels = data.train.query_labels(qid) scores = model.forward(torch.tensor(qd_feats).float().cuda()) # Make em torchy? qd_feats = torch.tensor(qd_feats).cuda() qd_labels = torch.tensor(qd_labels).cuda() # TODO: Fix this spaghetti shit scores_d = scores.detach() ranking_boi = np.float64(scores.squeeze().cpu().detach().numpy()) # Get the ranking if not isinstance(ranking_boi, np.ndarray): ranking_boi = np.array([ranking_boi]) ranking, inv_ranking = rnk.rank_and_invert(ranking_boi) ranking = torch.tensor(ranking.copy()).cuda() # Get rid of pesky 1-document queries and initialize loss if len(scores) < 2: continue loss = 0 # Vectorize the loss calculation # Calculate all score differences scorediff = scores_d - scores_d.T # Calculate all signs squeeze_labels = qd_labels.unsqueeze(-1) signs = torch.sign(squeeze_labels - qd_labels).float() # Loss is just vectorized formula lambdas_ij = sigma * ((1 / 2) * (1 - scorediff) - (1 / (1 + torch.exp(sigma * scorediff)))) # Get labels and get ranking np_labels = qd_labels.cpu().numpy() np_ranking = ranking.cpu().numpy() # Initialize permutations list and add initial permutation pred_perms = [] sorted_pred = np.array([np_labels[idx] for idx in np_ranking]) pred_perms.append(sorted_pred) # Add all vectors of possible permutations to the list for p in range(len(sorted_pred)): for q in range(p + 1, len(sorted_pred)): perm_pred = sorted_pred.copy() temp = sorted_pred[p] perm_pred[p] = sorted_pred[q] perm_pred[q] = temp pred_perms.append(perm_pred) pred_perms = np.array(pred_perms) # Check correct IRM and calculate the scores for each permutation if (IRM == 'err'): ranking_measure = err(pred_perms) elif (IRM == 'ndcg'): label_rank = np.sort(np_labels)[::-1] ranking_measure = ndcg(pred_perms, label_rank) # Calculate all deltas for the IRM deltas = np.abs(ranking_measure[0] - ranking_measure[1:]) delta_irm = np.zeros((len(ranking), len(ranking))) delta_irm[np.triu_indices(len(ranking), 1)] = deltas delta_irm = torch.from_numpy(delta_irm - delta_irm.T).float().cuda() # Get the lambdas and multiply with the delta irm values lambdas_ij = lambdas_ij * delta_irm lambas_i = lambdas_ij.sum(dim=1) loss = scores.squeeze() * lambas_i loss = loss.sum() # Keep track of rolling average rolling_avg = loss / (len(ranking)**2) overall_loss += rolling_avg temp_loss += rolling_avg.item() if (i + 1) % eval_every == 0: avg_ndcg = evaluate_model(model, data.validation, regression=True) print("NCDG: ", avg_ndcg, 'Loss: ', temp_loss / -eval_every) temp_loss = 0 # Update gradients loss.backward() optimizer.step() #break #print(ranking) print(ranking) print("NDCG: ", evaluate_model(model, data.validation, regression=True)) print("epoch_loss: ", overall_loss / data.train.num_queries())