def setUp(self): # Create ranker to test with ranker_args = ['3'] ranker_tie = 'random' init_weights = 'random' feature_count = 5 self.ranker = DRF(ranker_args, ranker_tie, feature_count, sample=None, init=init_weights) # Create queries to test with test_queries = """ 1 qid:373 1:0.089908 2:0.531250 3:0.500000 4:0.500000 5:0.156538 0 qid:373 1:0.066055 2:0.171875 3:0.000000 4:0.250000 5:0.084715 0 qid:373 1:0.148624 2:0.015625 3:0.250000 4:0.250000 5:0.151013 0 qid:373 1:0.099083 2:0.250000 3:0.500000 4:0.750000 5:0.134438 0 qid:373 1:0.051376 2:0.078125 3:0.250000 4:0.250000 5:0.060773 0 qid:373 1:0.045872 2:1.000000 3:0.250000 4:0.250000 5:0.163904 """ query_fh = cStringIO.StringIO(test_queries) self.query = query.Queries(query_fh, feature_count)['373'] query_fh.close() # Save the original ranking self.ranker.init_ranking(self.query) self.ranking = [ self.ranker.next() for _ in range(self.ranker.document_count()) ]
def test_create_ranking_vector(self): feature_count = 5 # Create queries to test with test_queries = """ 1 qid:373 1:0.080000 2:0.500000 3:0.500000 4:0.500000 5:0.160000 0 qid:373 1:0.070000 2:0.180000 3:0.000000 4:0.250000 5:0.080000 0 qid:373 1:0.150000 2:0.016000 3:0.250000 4:0.250000 5:0.150000 0 qid:373 1:0.100000 2:0.250000 3:0.500000 4:0.750000 5:0.130000 0 qid:373 1:0.050000 2:0.080000 3:0.250000 4:0.250000 5:0.060000 0 qid:373 1:0.050000 2:1.000000 3:0.250000 4:0.250000 5:0.160000 """ hard_gamma = [ 1, 0.63092975357, 0.5, 0.43067655807, 0.38685280723, 0.3562071871 ] hard_ranking_vector = [ 0.27938574, 1.11639191, 1.02610328, 1.29150486, 0.42166665 ] query_fh = cStringIO.StringIO(test_queries) this_query = query.Queries(query_fh, feature_count)['373'] query_fh.close() fake_ranking = sorted(this_query.get_docids()) # gamma, ranking_vector = utils.create_ranking_vector( ranking_vector = utils.create_ranking_vector(this_query, fake_ranking) # self.assertEqual(len(gamma), len(hard_gamma)) self.assertEqual(feature_count, len(ranking_vector)) # for i in xrange(0, len(gamma)): # self.assertAlmostEqual(gamma[i], hard_gamma[i]) for j in xrange(0, feature_count): self.assertAlmostEqual(ranking_vector[j], hard_ranking_vector[j])
def step1_ListCreation(self, n_rankers=3, credits=False): print('Testing step 1: creation of multileaved list') arg_str = "" if (credits): arg_str = "-c True" multil = ml.ProbabilisticMultileave(arg_str) query_fh = cStringIO.StringIO(self.test_queries) queries = qu.Queries(query_fh, self.test_num_features) query = queries[queries.keys()[0]] query_fh.close() ranker_arg_str = ['ranker.model.BM25', '1'] # second arg corresponds to ranker_type.. ties = "random" feature_count = None rankers = [ rnk(ranker_arg_str, ties, feature_count) for _ in range(n_rankers) ] length = 10 (createdList, _) = multil.multileave(rankers, query, length) foundDocs = [d.docid for d in createdList] existingDocs = [q.docid for q in query.get_docids()] assert (set(foundDocs).issubset(set(existingDocs))) assert (len(foundDocs) == length) assert (len(foundDocs) == len(set(foundDocs))) # No duplicates # For next step: self.foundDocs = createdList self.rankers = rankers self.query = query self.multil = multil
def test_queries(self): query_fh = cStringIO.StringIO(self.test_queries) queries = qu.Queries(query_fh, self.test_num_features) query = queries['1'] query_fh.close() self.assertEqual(4, query.get_document_count()) self.assertEqual(4, len(query.get_feature_vectors())) self.assertEqual([0, 1, 2, 3], [d.docid for d in query.get_docids()]) # TODO: do "labels" have to be np array? not a list? self.assertEqual([4, 1, 0, 0], query.get_labels().tolist()) # self.assertEqual(1, query.get_label(1)) TODO: FIX self.assertEqual(None, query.get_predictions()) self.assertEqual(None, query.get_comments()) self.assertEqual(None, query.get_comment(0))
def setUp(self): # initialize query self.test_num_features = 6 test_query = """ 4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant 1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant """ self.query_fh = cStringIO.StringIO(test_query) self.queries = query.Queries(self.query_fh, self.test_num_features) self.query = self.queries['1'] # initialize pairwise learner self.learner = PairwiseLearningSystem(self.test_num_features, "--init_weights 0,0,1,0,0,0 --epsilon 0.0 --eta 0.001 --ranker " "ranker.DeterministicRankingFunction --ranker_tie first")
def setUp(self): # initialize query self.test_num_features = 6 test_query = """ 4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant 1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant """ self.query_fh = cStringIO.StringIO(test_query) self.queries = query.Queries(self.query_fh, self.test_num_features) self.query = self.queries['1'] # initialize listwise learner self.learner = ListwiseLearningSystem(self.test_num_features, "--init_weights 0,0,1,0,0,0 --delta 1.0 --alpha 0.01 --ranker " "ranker.ProbabilisticRankingFunction --ranker_args 3 --ranker_tie " "first --comparison comparison.ProbabilisticInterleaveWithHistory" " --comparison_args \"--history_length 10 --biased true\"")
def setUp(self): self.test_num_features = 6 test_query = """ 4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant 1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant """ self.query_fh = cStringIO.StringIO(test_query) self.queries = query.Queries(self.query_fh, self.test_num_features) self.query = self.queries['1'] zero_weight_str = "0 0 0 0 0 0" self.zero_weights = np.asarray( [float(x) for x in zero_weight_str.split()]) weight_str = "0 0 1 0 0 0" self.weights = np.asarray([float(x) for x in weight_str.split()])
def setUp(self): self.test_num_features = 6 test_query = """ 1 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # relevant 1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant 0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2.1 6:5.6 # not relevant """ self.query_fh = cStringIO.StringIO(test_query) self.queries = qu.Queries(self.query_fh, self.test_num_features) self.query = self.queries['1'] zero_weight_str = "0 0 0 0 0 0" self.zero_weights = np.asarray( [float(x) for x in zero_weight_str.split()]) # results in ranking: 1, 3, 2, 0 weight_str_1 = "0 0 1 0 1 0" self.weights_1 = np.asarray([float(x) for x in weight_str_1.split()]) weight_str_2 = "1 0 0 0 1 0" self.weights_2 = np.asarray([float(x) for x in weight_str_2.split()])
def test_create_ranking_vector(self): feature_count = 5 # Create queries to test with test_queries = """ 1 qid:373 1:0.080000 2:0.500000 3:0.500000 4:0.500000 5:0.160000 0 qid:373 1:0.070000 2:0.180000 3:0.000000 4:0.250000 5:0.080000 0 qid:373 1:0.150000 2:0.016000 3:0.250000 4:0.250000 5:0.150000 0 qid:373 1:0.100000 2:0.250000 3:0.500000 4:0.750000 5:0.130000 0 qid:373 1:0.050000 2:0.080000 3:0.250000 4:0.250000 5:0.060000 0 qid:373 1:0.050000 2:1.000000 3:0.250000 4:0.250000 5:0.160000 """ hard_ranking_vector = [ 0.27938574, 1.11639191, 1.02610328, 1.29150486, 0.42166665 ] query_fh = cStringIO.StringIO(test_queries) this_query = query.Queries(query_fh, feature_count)['373'] query_fh.close() fake_ranking = sorted(this_query.get_docids()) ranking_vector = utils.create_ranking_vector(this_query, fake_ranking) self.assertEqual(feature_count, len(ranking_vector)) for j in xrange(0, feature_count): self.assertAlmostEqual(ranking_vector[j], hard_ranking_vector[j])
def test_queries(self): query_fh = cStringIO.StringIO(self.test_queries) queries = qu.Queries(query_fh, self.test_num_features) query_fh.close() self.assertEqual(1, queries.get_size())