def step2_InferOutcome(self): print('Testing step 2: infer_outcome') l = self.foundDocs rankers = self.rankers user_model = CascadeUserModel("--p_click 0:.0, 1:1.0" " --p_stop 0:.0, 1:.0") query = self.query clicks = user_model.get_clicks(l, query.get_labels()) creds = self.multil.infer_outcome(l, rankers, clicks, query) print "Clicks on list: ", clicks print "Ranks: ", creds assert (len(creds) == len(self.rankers)) # New asserts because ranks are returned, not creds assert (1 in creds) assert (0 not in creds) assert (len(creds) + 1 not in creds)
def __init__(self, feature_sets, n_features=64, cutoff=10, click_model="navigational", experiment_type=""): self.experiment_type = experiment_type self.n_rankers = len(feature_sets) self.n_features = n_features self.cutoff = cutoff self.train_queries = qu.load_queries(PATH_TRAIN_QUERIES, self.n_features) self.test_queries = qu.load_queries(PATH_TEST_QUERIES, self.n_features) self.samplemultil100 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 100") self.samplemultil1000 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 1000") self.samplemultil10000 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 10000") self.samplemultil100000 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 100000") #self.samplemultil10 = sbml.SampleBasedProbabilisticMultileave("--n_samples 10") #self.samplemultil1000 = sbml.SampleBasedProbabilisticMultileave("--n_samples 100") #self.samplemultil10000 = sbml.SampleBasedProbabilisticMultileave("--n_samples 1000") self.multil = ml.ProbabilisticMultileave() self.multil_nonbin = ml.ProbabilisticMultileave("-c True") self.interl = ProbabilisticInterleave('--aggregate binary') self.TeamDraftMultileave = TeamDraftMultileave() self.allrankers = [ rnk("1", "random", self.n_features) for _ in range(self.n_rankers) ] for feature_ids, ranker in zip(feature_sets, self.allrankers): weights = np.zeros(self.n_features) for fid in feature_ids: weights[fid] = 1 ranker.update_weights(weights) if experiment_type == "sensitivity": ndcg = NdcgEval() average_ndcgs = np.zeros((self.n_rankers)) for query in self.test_queries: for i, ranker in enumerate(self.allrankers): ranker.init_ranking(query) average_ndcgs[i] += ndcg.get_value( ranker.get_ranking(), query.get_labels().tolist(), None, self.cutoff) average_ndcgs /= len(self.test_queries) self.all_true_pref = np.zeros((self.n_rankers, self.n_rankers)) for i in range(self.n_rankers): for j in range(self.n_rankers): self.all_true_pref[ i, j] = 0.5 * (average_ndcgs[i] - average_ndcgs[j]) + 0.5 elif experiment_type == "bias": click_model = "random" self.all_true_pref = np.zeros((self.n_rankers, self.n_rankers)) for i in range(self.n_rankers): for j in range(self.n_rankers): self.all_true_pref[i, j] = 0.5 else: raise Exception("Set experiment type") if click_model == "navigational": click_str = "--p_click 0:.05, 1:0.95 --p_stop 0:.2, 1:.5" elif click_model == "perfect": click_str = "--p_click 0:.0, 1:1. --p_stop 0:.0, 1:.0" elif click_model == "informational": click_str = "--p_click 0:.4, 1:.9 --p_stop 0:.1, 1:.5" elif click_model == "random": click_str = "--p_click 0:.5, 1:.5 --p_stop 0:.0, 1:.0" self.user_model = CascadeUserModel(click_str)