def possible_disambiguation_pages_simple(self, title): """ get the possible disambiguation page just by using MySql Full Text indexing :param title: string :return: [(title, match_score)] """ spaced_title = normalizeTitle(title) spaced_title_boolean = " ".join( ['+{}'.format(w.strip()) for w in spaced_title.split()]) query = ''' select name, match(spaced_title) against ("%s") as score from Page where match(spaced_title) against ("%s" in boolean mode) order by score desc limit 15; ''' % (spaced_title, spaced_title_boolean) return [q[0].lower() for q in self.fetch_all(query)]
def test_no_namespace(self): self.assertEqual(normalizeTitle("python"), "Python") self.assertEqual(normalizeTitle("python 3"), "Python 3") self.assertEqual(normalizeTitle("python__3"), "Python 3")
def test_not_known_namespace(self): self.assertEqual(normalizeTitle("Category: Births"), "Category: Births") self.assertEqual(normalizeTitle("_category: births___"), "Category: Births")
def test_known_namespace(self): self.assertEqual(normalizeTitle("Template: Births"), "Template:Births") self.assertEqual(normalizeTitle(" template: births_"), "Template:Births")