def __init__(self, feature_count, arg_str): """ @param featur_count: the number of features @param arg_str: "-h HISTORY_LENGTH -e NUM_CANDIDATES \ -s SELECT_CANDIDATE". """ ListwiseLearningSystem.__init__(self, feature_count, arg_str) parser = argparse.ArgumentParser(prog=self.__class__.__name__) parser.add_argument("-e", "--num_candidates", required=True, type=int, help="Number of candidate rankers to explore in each round.") parser.add_argument("-l", "--history_length", required=True, type=int, help="Number of historic data points to take into account when " "pre-selecting candidates.") parser.add_argument("-s", "--select_candidate", required=True, help="Method for selecting a candidate ranker from a ranker pool." " Options: select_candidate_random, select_candidate_simple," " select_candidate_repeated, or own implementation.") parser.add_argument("-b", "--biased", default="False", help="Set to true if comparison should be biased (i.e., not use" "importance sampling).") parser.add_argument("-r", "--num_repetitions", type=int, default=1, help="The number of repetitions for each ranker pair evaluation" "(when the selection method is select_candidate_repeated).") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) self.num_candidates = args["num_candidates"] self.select_candidate = getattr(self, args["select_candidate"]) self.history_length = args["history_length"] self.biased = string_to_boolean(args["biased"]) logging.info("Initialized historical data usage to: %r" % self.biased) self.num_repetitions = args["num_repetitions"] self.history = []
def __init__(self, feature_count, arg_str): """ @param featur_count: the number of features @param arg_str: "-h HISTORY_LENGTH -e NUM_CANDIDATES \ -s SELECT_CANDIDATE". """ ListwiseLearningSystem.__init__(self, feature_count, arg_str) parser = argparse.ArgumentParser(prog=self.__class__.__name__) parser.add_argument( "-e", "--num_candidates", required=True, type=int, help="Number of candidate rankers to explore in each round.") parser.add_argument( "-l", "--history_length", required=True, type=int, help="Number of historic data points to take into account when " "pre-selecting candidates.") parser.add_argument( "-s", "--select_candidate", required=True, help="Method for selecting a candidate ranker from a ranker pool." " Options: select_candidate_random, select_candidate_simple," " select_candidate_repeated, or own implementation.") parser.add_argument( "-b", "--biased", default="False", help="Set to true if comparison should be biased (i.e., not use" "importance sampling).") parser.add_argument( "-r", "--num_repetitions", type=int, default=1, help="The number of repetitions for each ranker pair evaluation" "(when the selection method is select_candidate_repeated).") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) self.num_candidates = args["num_candidates"] self.select_candidate = getattr(self, args["select_candidate"]) self.history_length = args["history_length"] self.biased = string_to_boolean(args["biased"]) logging.info("Initialized historical data usage to: %r" % self.biased) self.num_repetitions = args["num_repetitions"] self.history = []
def setUp(self): # initialize query self.test_num_features = 6 test_query = """ 4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant 1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant """ self.query_fh = cStringIO.StringIO(test_query) self.queries = query.Queries(self.query_fh, self.test_num_features) self.query = self.queries['1'] # initialize listwise learner self.learner = ListwiseLearningSystem(self.test_num_features, "--init_weights 0,0,1,0,0,0 --delta 1.0 --alpha 0.01 --ranker " "ranker.ProbabilisticRankingFunction --ranker_args 3 --ranker_tie " "first --comparison comparison.ProbabilisticInterleaveWithHistory" " --comparison_args \"--history_length 10 --biased true\"")
class TestListwiseLearning(unittest.TestCase): def setUp(self): # initialize query self.test_num_features = 6 test_query = """ 4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant 1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant """ self.query_fh = cStringIO.StringIO(test_query) self.queries = query.Queries(self.query_fh, self.test_num_features) self.query = self.queries['1'] # initialize listwise learner self.learner = ListwiseLearningSystem(self.test_num_features, "--init_weights 0,0,1,0,0,0 --delta 1.0 --alpha 0.01 --ranker " "ranker.ProbabilisticRankingFunction --ranker_args 3 --ranker_tie " "first --comparison comparison.ProbabilisticInterleaveWithHistory" " --comparison_args \"--history_length 10 --biased true\"") def testRanker(self): self.learner.get_ranked_list(self.query)
def _update_solution(self, outcome, clicks): # Keep track of history if self.history_length > 0: if len(self.history) == self.history_length: self.history.pop(0) # store probability of the observed list under the source # distribution so that it only has to be computed once new_h_item = HistoryItem(self.current_l, self.current_context, clicks, self.current_query) new_h_item.p_list_source = self.comparison.get_probability_of_list( self.current_l, self.current_context, self.current_query) self.history.append(new_h_item) # use inherited method for the actual update return ListwiseLearningSystem._update_solution(self, outcome, clicks)
def _update_solution(self, outcome, clicks): # Keep track of history if self.history_length > 0: if len(self.history) == self.history_length: self.history.pop(0) # store probability of the observed list under the source # distribution so that it only has to be computed once new_h_item = HistoryItem(self.current_l, self.current_context, clicks, self.current_query) new_h_item.p_list_source = self.comparison.get_probability_of_list( self.current_l, self.current_context, self.current_query) self.history.append(new_h_item) # use inherited method for the actual update return ListwiseLearningSystem._update_solution(self, outcome, clicks)