def evaluate_test_performance(score_fn: nn.Module, test: SVMRankDataset) -> float: """与えられたmodelのランキング性能をテストデータにおける真の嗜好度合い情報(\gamma)を使ってnDCG@10で評価する.""" loader = DataLoader( test, batch_size=1024, shuffle=False, collate_fn=test.collate_fn() ) ndcg_score = 0.0 for batch in loader: gamma = convert_rel_to_gamma(relevance=batch.relevance) ndcg_score += ndcg( score_fn(batch.features), gamma, batch.n, k=10, exp=False ).sum() return float(ndcg_score / len(test))
def test_collate_sparse_3(): # Load data set. dataset = get_sample_dataset(sparse=True) # Construct a batch of three samples and collate it with a maximum list # size of 3. batch = [dataset[0], dataset[1], dataset[2]] collate_fn = SVMRankDataset.collate_fn(UniformSampler(max_list_size=3)) # Assert resulting tensor shape is as expected. tensor_batch = collate_fn(batch) assert tensor_batch.features.shape == (3, 3, 45)
def test_collate_dense_all(): # Load data set. dataset = get_sample_dataset(sparse=False) # Construct a batch of three samples and collate it with an unlimited # maximum list size. batch = [dataset[0], dataset[1], dataset[2]] collate_fn = SVMRankDataset.collate_fn(UniformSampler(max_list_size=None)) # Assert resulting tensor shape is as expected. tensor_batch = collate_fn(batch) assert tensor_batch.features.shape == (3, 14, 45)
def test_basic_sgd_learning(): torch.manual_seed(42) dataset = get_sample_dataset() input_dim = dataset[0].features.shape[1] collate_fn = SVMRankDataset.collate_fn(UniformSampler(max_list_size=50)) model = Model(input_dim) optimizer = torch.optim.SGD(model.parameters(), lr=0.001) loss_fn = PairwiseHingeLoss() arp_per_epoch = torch.zeros(100) # Perform 100 epochs for epoch in range(100): # Load and iterate over dataset avg_arp = 0.0 loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn) for i, batch in enumerate(loader): # Get batch of samples xs, ys, n = batch.features, batch.relevance, batch.n # Compute loss loss = loss_fn(model(xs), ys, n) loss = loss.mean() # Perform SGD step optimizer.zero_grad() loss.backward() optimizer.step() # Evaluate ARP on train data model.eval() arp_score = torch.mean(arp(model(xs), ys, n)) model.train() # Keep track of average ARP in this epoch avg_arp = avg_arp + (float(arp_score) - avg_arp) / (i + 1) # Record the average ARP. arp_per_epoch[epoch] = avg_arp # Assert that the ARP was decreased by a significant amount from start to # finish. assert arp_per_epoch[-1] - arp_per_epoch[0] <= -0.40
def evaluate_test_performance(score_fn: nn.Module, test: SVMRankDataset, objective: str) -> float: """与えられたスコアリング関数のランキング性能をテストデータにおける目的変数の期待値を使ってnDCG@10で評価する.""" loader = DataLoader(test, batch_size=1024, shuffle=False, collate_fn=test.collate_fn()) ndcg_score = 0.0 for batch in loader: mu = convert_rel_to_mu(batch.relevance)[0] mu_zero = convert_rel_to_mu_zero(batch.relevance)[0] outcome = mu if objective == "via-rec" else (mu - mu_zero) ndcg_score += ndcg(score_fn(batch.features), outcome, batch.n, k=10, exp=False).sum() return float(ndcg_score / len(test))
def get_sample_dataset(*args, **kwargs): """Get sample dataset, uses the same arguments as `svmranking_dataset`.""" dataset_file = "tests/datasets/resources/dataset.txt" return SVMRankDataset(dataset_file, *args, **kwargs)
def train_ranker( score_fn: nn.Module, optimizer: optim, estimator: str, objective: str, train: SVMRankDataset, test: SVMRankDataset, batch_size: int = 32, n_epochs: int = 30, ) -> List: """ランキングモデルを学習するための関数. パラメータ ---------- score_fn: nn.Module スコアリング関数. optimizer: optim パラメータ最適化アルゴリズム. estimator: str スコアリング関数を学習するための目的関数を近似する推定量. 'naive', 'ips-via-rec', 'ips-platform'のいずれかしか与えることができない. objective: str 推薦枠内経由('via_rec')のKPIを扱う場面か、プラットフォーム全体('platform')で定義されたKPI扱う場面かを指定. 'via_rec', 'platform'のいずれかしか与えることができない. train: SVMRankDataset (オリジナルの)トレーニングデータ. test: SVMRankDataset (オリジナルの)テストデータ. batch_size: int, default=32 バッチサイズ. n_epochs: int, default=30 エポック数. """ assert estimator in [ "naive", "ips-via-rec", "ips-platform", ], f"estimator must be 'naive', 'ips-via-rec', 'ips-platform', but {estimator} is given" assert objective in [ "via-rec", "platform", ], f"objective must be 'via-rec' or 'objective', but {objective} is given" ndcg_score_list = list() for _ in tqdm(range(n_epochs)): loader = DataLoader( train, batch_size=batch_size, shuffle=True, collate_fn=train.collate_fn(), ) score_fn.train() for batch in loader: conversion = convert_rel_to_mu(batch.relevance)[1] conversion_zero = convert_rel_to_mu_zero(batch.relevance)[1] click, pscore, recommend, pscore_zero = generate_click_and_recommend( batch.relevance) conversion_obs = conversion * click + conversion_zero * (1 - recommend) scores = score_fn(batch.features) if estimator == "naive": loss = listwise_loss( scores=scores, click=click, conversion=conversion_obs, num_docs=batch.n, ) elif estimator == "ips-via-rec": loss = listwise_loss( scores=scores, click=click, conversion=conversion_obs, num_docs=batch.n, recommend=None, pscore=pscore, pscore_zero=None, ) elif estimator == "ips-platform": loss = listwise_loss( scores=scores, click=click, conversion=conversion_obs, num_docs=batch.n, recommend=recommend, pscore=pscore, pscore_zero=pscore_zero, ) optimizer.zero_grad() loss.backward() optimizer.step() score_fn.eval() ndcg_score = evaluate_test_performance(score_fn=score_fn, test=test, objective=objective) ndcg_score_list.append(ndcg_score) return ndcg_score_list
def train_ranker( score_fn: nn.Module, optimizer: optim, estimator: str, train: SVMRankDataset, test: SVMRankDataset, batch_size: int = 32, n_epochs: int = 30, pow_true: float = 1.0, pow_used: Optional[float] = None, ) -> List: """ランキングモデルを学習するための関数. パラメータ ---------- score_fn: nn.Module スコアリング関数. optimizer: optim パラメータ最適化アルゴリズム. estimator: str スコアリング関数を学習するための目的関数を観測データから近似する推定量. 'naive', 'ips', 'ideal'のいずれかしか与えることができない. 'ideal'が与えられた場合は、真の嗜好度合いデータ(Explicit Feedback)をもとに、ランキングモデルを学習する. train: SVMRankDataset (オリジナルの)トレーニングデータ. test: SVMRankDataset (オリジナルの)テストデータ. batch_size: int, default=32 バッチサイズ. n_epochs: int, default=30 エポック数. pow_true: float, default=1.0 ポジションバイアスの大きさを決定するパラメータ. クリックデータの生成に用いられる. pow_trueが大きいほど、ポジションバイアスの影響(真の嗜好度合いとクリックデータの乖離)が大きくなる. pow_used: Optional[float], default=None ポジションバイアスの大きさを決定するパラメータ. ランキングモデルの学習に用いられる. Noneが与えられた場合は、pow_trueと同じ値が設定される. pow_trueと違う値を与えると、ポジションバイアスの大きさを見誤ったケースにおけるランキングモデルの学習を再現できる. """ assert estimator in [ "naive", "ips", "ideal", ], f"estimator must be 'naive', 'ips', or 'ideal', but {estimator} is given" if pow_used is None: pow_used = pow_true ndcg_score_list = list() for _ in tqdm(range(n_epochs)): #P228のものに対応 loader = DataLoader( #DataLoaderって毎回epochで作っておくもんだっけ!?!???? train, batch_size=batch_size, shuffle=True, collate_fn=train.collate_fn(), ) score_fn.train() for batch in loader: if estimator == "naive": click, theta = convert_gamma_to_implicit( relevance=batch.relevance, pow_true=pow_true, pow_used=pow_used) loss = listwise_loss(scores=score_fn(batch.features), click=click, num_docs=batch.n # ポジションバイアスを考慮しない ) elif estimator == "ips": # click, theta = convert_gamma_to_implicit( relevance=batch.relevance, pow_true=pow_true, pow_used=pow_used) loss = listwise_loss( scores=score_fn(batch.features), click=click, num_docs=batch.n, pscore=theta, #ポジションバイアスの逆比を考慮 ) elif estimator == "ideal": gamma = convert_rel_to_gamma( relevance=batch.relevance) #真のデータを与えておく loss = listwise_loss(scores=score_fn(batch.features), click=gamma, num_docs=batch.n) optimizer.zero_grad() loss.backward() optimizer.step() score_fn.eval() ndcg_score = evaluate_test_performance(score_fn=score_fn, test=test) ndcg_score_list.append(ndcg_score) return ndcg_score_list