def test_is_page_candidate_fuzzy_match(self): url = u"http://www.example.com/lazy_madonna_beatles" urlTitle = u"example.com | lazy madonna lyrics by the beatles" title = u"Lady Madonna" artist = u"The Beatles" # very small diffs (typo) are ok self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title, artist), True, url) # reject different title urlTitle = u"example.com | busy madonna lyrics by the beatles" self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title, artist), False, url) # (title, artist) != (artist, title) urlTitle = u"example.com | the beatles lyrics by Lazy Madonna" self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title, artist), False, url)
def test_is_page_candidate_fuzzy_match(self): """Test matching html page title with song infos -- when song infos are not present in the title.""" s = self.source url = s['url'] + s['path'] urlTitle = u'example.com | Beats song by John doe' # very small diffs (typo) are ok eg 'beats' vs 'beets' with same artist self.assertEqual(lyrics.is_page_candidate(url, urlTitle, s['title'], s['artist']), True, url) # reject different title urlTitle = u'example.com | seets bong lyrics by John doe' self.assertEqual(lyrics.is_page_candidate(url, urlTitle, s['title'], s['artist']), False, url)
def test_is_page_candidate_fuzzy_match(self): url = u'http://www.example.com/lazy_madonna_beatles' urlTitle = u'example.com | lazy madonna lyrics by the beatles' title = u'Lady Madonna' artist = u'The Beatles' # very small diffs (typo) are ok self.assertEqual( lyrics.is_page_candidate(url, urlTitle, title, artist), True, url) # reject different title urlTitle = u'example.com | busy madonna lyrics by the beatles' self.assertEqual( lyrics.is_page_candidate(url, urlTitle, title, artist), False, url) # (title, artist) != (artist, title) urlTitle = u'example.com | the beatles lyrics by Lazy Madonna' self.assertEqual( lyrics.is_page_candidate(url, urlTitle, title, artist), False, url)
def test_is_page_candidate_exact_match(self): from bs4 import SoupStrainer, BeautifulSoup for s in self.sourcesOk: url = unicode(s["url"] + s["path"]) html = lyrics.fetch_url(url) soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer("title")) self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s["title"], s["artist"]), True, url)
def test_is_page_candidate(self): for s in self.sourcesOk: url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html) if not soup.title: continue self.assertEqual( lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_is_page_candidate(self): for s in self.sourcesOk: url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html) if not soup.title: continue self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_is_page_candidate(self): from bs4 import SoupStrainer, BeautifulSoup for s in self.sourcesOk: url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer('title')) self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_is_page_candidate_exact_match(self): """Test matching html page title with song infos -- when song infos are present in the title.""" from bs4 import SoupStrainer, BeautifulSoup s = self.source url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer('title')) self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)