Ejemplo n.º 1
0
    def setUp(self):
        # Create ranker to test with
        ranker_args = ['3']
        ranker_tie = 'random'
        init_weights = 'random'
        feature_count = 5
        self.ranker = DRF(ranker_args,
                          ranker_tie,
                          feature_count,
                          sample=None,
                          init=init_weights)

        # Create queries to test with
        test_queries = """
        1 qid:373 1:0.089908 2:0.531250 3:0.500000 4:0.500000 5:0.156538
        0 qid:373 1:0.066055 2:0.171875 3:0.000000 4:0.250000 5:0.084715
        0 qid:373 1:0.148624 2:0.015625 3:0.250000 4:0.250000 5:0.151013
        0 qid:373 1:0.099083 2:0.250000 3:0.500000 4:0.750000 5:0.134438
        0 qid:373 1:0.051376 2:0.078125 3:0.250000 4:0.250000 5:0.060773
        0 qid:373 1:0.045872 2:1.000000 3:0.250000 4:0.250000 5:0.163904
        """
        query_fh = cStringIO.StringIO(test_queries)
        self.query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()

        # Save the original ranking
        self.ranker.init_ranking(self.query)
        self.ranking = [
            self.ranker.next() for _ in range(self.ranker.document_count())
        ]
Ejemplo n.º 2
0
    def test_create_ranking_vector(self):
        feature_count = 5
        # Create queries to test with
        test_queries = """
            1 qid:373 1:0.080000 2:0.500000 3:0.500000 4:0.500000 5:0.160000
            0 qid:373 1:0.070000 2:0.180000 3:0.000000 4:0.250000 5:0.080000
            0 qid:373 1:0.150000 2:0.016000 3:0.250000 4:0.250000 5:0.150000
            0 qid:373 1:0.100000 2:0.250000 3:0.500000 4:0.750000 5:0.130000
            0 qid:373 1:0.050000 2:0.080000 3:0.250000 4:0.250000 5:0.060000
            0 qid:373 1:0.050000 2:1.000000 3:0.250000 4:0.250000 5:0.160000
        """
        hard_gamma = [
            1, 0.63092975357, 0.5, 0.43067655807, 0.38685280723, 0.3562071871
        ]
        hard_ranking_vector = [
            0.27938574, 1.11639191, 1.02610328, 1.29150486, 0.42166665
        ]
        query_fh = cStringIO.StringIO(test_queries)
        this_query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()
        fake_ranking = sorted(this_query.get_docids())
        # gamma, ranking_vector = utils.create_ranking_vector(
        ranking_vector = utils.create_ranking_vector(this_query, fake_ranking)
        # self.assertEqual(len(gamma), len(hard_gamma))
        self.assertEqual(feature_count, len(ranking_vector))
        # for i in xrange(0, len(gamma)):
        #     self.assertAlmostEqual(gamma[i], hard_gamma[i])

        for j in xrange(0, feature_count):
            self.assertAlmostEqual(ranking_vector[j], hard_ranking_vector[j])
    def step1_ListCreation(self, n_rankers=3, credits=False):
        print('Testing step 1: creation of multileaved list')
        arg_str = ""
        if (credits):
            arg_str = "-c True"
        multil = ml.ProbabilisticMultileave(arg_str)

        query_fh = cStringIO.StringIO(self.test_queries)
        queries = qu.Queries(query_fh, self.test_num_features)

        query = queries[queries.keys()[0]]
        query_fh.close()

        ranker_arg_str = ['ranker.model.BM25', '1']
        # second arg corresponds to ranker_type..
        ties = "random"
        feature_count = None
        rankers = [
            rnk(ranker_arg_str, ties, feature_count) for _ in range(n_rankers)
        ]
        length = 10
        (createdList, _) = multil.multileave(rankers, query, length)

        foundDocs = [d.docid for d in createdList]
        existingDocs = [q.docid for q in query.get_docids()]
        assert (set(foundDocs).issubset(set(existingDocs)))
        assert (len(foundDocs) == length)
        assert (len(foundDocs) == len(set(foundDocs)))  # No duplicates

        # For next step:
        self.foundDocs = createdList
        self.rankers = rankers
        self.query = query
        self.multil = multil
Ejemplo n.º 4
0
    def test_queries(self):
        query_fh = cStringIO.StringIO(self.test_queries)
        queries = qu.Queries(query_fh, self.test_num_features)
        query = queries['1']
        query_fh.close()

        self.assertEqual(4, query.get_document_count())
        self.assertEqual(4, len(query.get_feature_vectors()))
        self.assertEqual([0, 1, 2, 3], [d.docid for d in query.get_docids()])
        # TODO: do "labels" have to be np array? not a list?
        self.assertEqual([4, 1, 0, 0], query.get_labels().tolist())
        #         self.assertEqual(1, query.get_label(1)) TODO: FIX
        self.assertEqual(None, query.get_predictions())
        self.assertEqual(None, query.get_comments())
        self.assertEqual(None, query.get_comment(0))
Ejemplo n.º 5
0
 def setUp(self):
     # initialize query
     self.test_num_features = 6
     test_query = """
     4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant
     1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad
     0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
     0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
     """
     self.query_fh = cStringIO.StringIO(test_query)
     self.queries = query.Queries(self.query_fh, self.test_num_features)
     self.query = self.queries['1']
     # initialize pairwise learner
     self.learner = PairwiseLearningSystem(self.test_num_features,
         "--init_weights 0,0,1,0,0,0 --epsilon 0.0 --eta 0.001 --ranker "
         "ranker.DeterministicRankingFunction --ranker_tie first")
Ejemplo n.º 6
0
 def setUp(self):
     # initialize query
     self.test_num_features = 6
     test_query = """
     4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant
     1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad
     0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
     0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
     """
     self.query_fh = cStringIO.StringIO(test_query)
     self.queries = query.Queries(self.query_fh, self.test_num_features)
     self.query = self.queries['1']
     # initialize listwise learner
     self.learner = ListwiseLearningSystem(self.test_num_features,
         "--init_weights 0,0,1,0,0,0 --delta 1.0 --alpha 0.01 --ranker "
         "ranker.ProbabilisticRankingFunction --ranker_args 3 --ranker_tie "
         "first --comparison comparison.ProbabilisticInterleaveWithHistory"
         " --comparison_args \"--history_length 10 --biased true\"")
    def setUp(self):
        self.test_num_features = 6
        test_query = """
        4 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # highly relevant
        1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # bad
        0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
        0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
        """

        self.query_fh = cStringIO.StringIO(test_query)
        self.queries = query.Queries(self.query_fh, self.test_num_features)
        self.query = self.queries['1']

        zero_weight_str = "0 0 0 0 0 0"
        self.zero_weights = np.asarray(
            [float(x) for x in zero_weight_str.split()])

        weight_str = "0 0 1 0 0 0"
        self.weights = np.asarray([float(x) for x in weight_str.split()])
Ejemplo n.º 8
0
    def setUp(self):
        self.test_num_features = 6
        test_query = """
        1 qid:1 1:2.6 2:1 3:2.1 4:0 5:2 6:1.4 # relevant
        1 qid:1 1:1.2 2:1 3:2.9 4:0 5:2 6:1.9 # relevant
        0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2 6:5.6 # not relevant
        0 qid:1 1:0.5 2:1 3:2.3 4:0 5:2.1 6:5.6 # not relevant
        """

        self.query_fh = cStringIO.StringIO(test_query)
        self.queries = qu.Queries(self.query_fh, self.test_num_features)
        self.query = self.queries['1']

        zero_weight_str = "0 0 0 0 0 0"
        self.zero_weights = np.asarray(
            [float(x) for x in zero_weight_str.split()])
        # results in ranking: 1, 3, 2, 0
        weight_str_1 = "0 0 1 0 1 0"
        self.weights_1 = np.asarray([float(x) for x in weight_str_1.split()])
        weight_str_2 = "1 0 0 0 1 0"
        self.weights_2 = np.asarray([float(x) for x in weight_str_2.split()])
Ejemplo n.º 9
0
    def test_create_ranking_vector(self):
        feature_count = 5
        # Create queries to test with
        test_queries = """
            1 qid:373 1:0.080000 2:0.500000 3:0.500000 4:0.500000 5:0.160000
            0 qid:373 1:0.070000 2:0.180000 3:0.000000 4:0.250000 5:0.080000
            0 qid:373 1:0.150000 2:0.016000 3:0.250000 4:0.250000 5:0.150000
            0 qid:373 1:0.100000 2:0.250000 3:0.500000 4:0.750000 5:0.130000
            0 qid:373 1:0.050000 2:0.080000 3:0.250000 4:0.250000 5:0.060000
            0 qid:373 1:0.050000 2:1.000000 3:0.250000 4:0.250000 5:0.160000
        """
        hard_ranking_vector = [
            0.27938574, 1.11639191, 1.02610328, 1.29150486, 0.42166665
        ]
        query_fh = cStringIO.StringIO(test_queries)
        this_query = query.Queries(query_fh, feature_count)['373']
        query_fh.close()
        fake_ranking = sorted(this_query.get_docids())
        ranking_vector = utils.create_ranking_vector(this_query, fake_ranking)
        self.assertEqual(feature_count, len(ranking_vector))

        for j in xrange(0, feature_count):
            self.assertAlmostEqual(ranking_vector[j], hard_ranking_vector[j])
Ejemplo n.º 10
0
 def test_queries(self):
     query_fh = cStringIO.StringIO(self.test_queries)
     queries = qu.Queries(query_fh, self.test_num_features)
     query_fh.close()
     self.assertEqual(1, queries.get_size())