Esempio n. 1
0
    def test_searches(self, in_filename, out_filename, poolsize=10):
        """
        Takes a file where each line gives a search and a list of
        expected URLs.
        """
        start_time = time.time()
        pool = eventlet.GreenPool(size=poolsize)

        inf = open(in_filename, 'r')
        r = csv.reader(inf, dialect='excel-tab')
        def doeeet(row):
            row = map(
                lambda x: x.strip(),
                map(
                    lambda x: x.decode('utf-8'),
                    row
                )
            )
            query = row[0]
            expected = row[1:]
            try:
                return (query, self.test_search(query, expected))
            except:
                print >>sys.stderr, "Caught exception while testing", query.encode('utf-8')
                return (query, map(lambda x: None, expected))
        print "Running searches..."
        print
        results = pool.imap(doeeet, r)
        with open(out_filename, 'w') as outf:
            w = csv.writer(outf, dialect='excel-tab')
            scores = []
            for count, result in enumerate(results):
                score = calculate_score(result[1])
                scores.append(score)
                row = [ result[0], score ] # the query and score
                row.extend(result[1]) # and the positions
                w.writerow(map(lambda x: unicode(x).encode('utf-8') if x is not None else "", row))
                if len(result[0]) > 69:
                    truncated_name = result[0][:69] + u"…"
                else:
                    truncated_name = result[0]
                print u"  %-70.70s %.2f" % (truncated_name, score)
                if count % 10 == 9:
                    print
                    print "Average score", float(sum(scores)) / len(scores)
                    print
        inf.close()
        print
        print "Summary"
        print "-------"
        print
        run_time = time.time() - start_time
        print "Ran %i queries in %.2f seconds, average score %f." % (len(scores), run_time, float(sum(scores)) / len(scores))
        print
Esempio n. 2
0
def scoretest():
    from searchtester.scoring import calculate_score
    import csv
    from optparse import OptionParser
    
    parser = OptionParser(
        usage="usage: %prog [options] resultsfile",
        description="Given a TSV file of searches and 0-indexed positions the best matches were found at, print an 'accuracy' score for each query. Prints a final summary including the average accuracy score.",
    )
    
    (options, args) = parser.parse_args()
    if len(args) != 1:
        parser.error("Must provide a results file, from a previous searchtest run .")
    with open(args[0]) as f:
        print "Calculating scores..."
        print
        r = csv.reader(f, dialect='excel-tab')
        scores = []
        for count, row in enumerate(r):
            score = calculate_score(row[2:])
            scores.append(score)
            if len(row[0]) > 69:
                truncated_name = row[0][:69] + u"…"
            else:
                truncated_name = row[0]
            print u"  %-70.70s %.2f" % (truncated_name, score)
            if count % 10 == 9:
                print
                print "Average score", float(sum(scores)) / len(scores)
                print
    print
    print "Summary"
    print "-------"
    print
    print "%i queries, average score %f." % (len(scores), float(sum(scores)) / len(scores))
    print

    return 0
Esempio n. 3
0
    def test_middling(self):
        self.assertRoughlyEqual(0.9926, calculate_score([0, 1, 2, 3, 5]))
        self.assertRoughlyEqual(0.9800, calculate_score([0, 1, 2, 4, 3]))
        self.assertRoughlyEqual(0.8520, calculate_score([0, 2, 1, 3, 4]))

        self.assertRoughlyEqual(0.7976, calculate_score([0, 3, 1, 2, 4]))
        self.assertRoughlyEqual(0.7775, calculate_score([0, 4, 1, 2, 3]))
        self.assertRoughlyEqual(0.7775, calculate_score([0, 3, 1, 4, 2]))

        self.assertRoughlyEqual(0.5977, calculate_score([1, 0, 2, 3, 4]))
        self.assertRoughlyEqual(0.5032, calculate_score([1, 0, 4, 3, 2]))
        self.assertRoughlyEqual(0.4497, calculate_score([2, 0, 1, 3, 4]))

        self.assertRoughlyEqual(0.4432, calculate_score([1, 2, 0, 3]))
        self.assertRoughlyEqual(0.3881, calculate_score([1, 3, 0, 2]))
        self.assertRoughlyEqual(0.2935, calculate_score([2, 1, 0, 3]))

        self.assertRoughlyEqual(0.2273, calculate_score([2, 1, 3, 4, 0]))
Esempio n. 4
0
 def test_awful(self):
     self.assertEqual(0.0, calculate_score([None]))
     self.assertEqual(0.0, calculate_score([None, None]))
     self.assertEqual(0.0, calculate_score([None, None, None]))
Esempio n. 5
0
 def test_perfect(self):
     self.assertEqual(1.0, calculate_score([0]))
     self.assertEqual(1.0, calculate_score([0, 1]))
     self.assertEqual(1.0, calculate_score([0, 1, 2]))
     self.assertEqual(1.0, calculate_score([0, 1, 2, 3]))
     self.assertEqual(1.0, calculate_score([0, 1, 2, 3, 4]))