def test_get_subtitles_for_talk(self): talk_json = self.__get_talk_json__('http://www.ted.com/talks/richard_wilkinson.html') subs = subtitles_scraper.get_subtitles_for_talk(talk_json, ['banana', 'fr', 'en'], None) self.assertTrue(subs.startswith('''1 00:00:11,820 --> 00:00:14,820 Vous savez tous que ce que je vais dire est vrai. 2'''))
def test_get_subtitles_for_newest_talk(self): ''' Newest talk often won't have subtitles when first made available. When this is the case we must return None and not throw. ''' from rss_scraper import NewTalksRss newest_talk = sorted(NewTalksRss(None).get_new_talks(), key=lambda t: time.strptime(t['date'], "%d.%m.%Y"), reverse=True)[0] talk_json = self.__get_talk_json__(newest_talk['link']) subs = subtitles_scraper.get_subtitles_for_talk(talk_json, ['en'], lambda m1, m2: sys.stdout.write('%s\n%s' % (m1, m2))) if subs: print "Newest Talk (%s) has subtitles: test ineffective" % (newest_talk['title'])
def test_get_subtitles_for_talk(self): talk_json = self.__get_talk_json__( 'http://www.ted.com/talks/richard_wilkinson.html') subs = subtitles_scraper.get_subtitles_for_talk( talk_json, ['banana', 'fr', 'en'], None) self.assertTrue( subs.startswith('''1 00:00:11,820 --> 00:00:14,820 Vous savez tous que ce que je vais dire est vrai. 2'''))
def test_get_subtitles_for_newest_talk(self): ''' Newest talk often won't have subtitles when first made available. When this is the case we must return None and not throw. ''' from rss_scraper import NewTalksRss newest_talk = sorted( NewTalksRss(None).get_new_talks(), key=lambda t: time.strptime(t['date'], "%d.%m.%Y"), reverse=True)[0] talk_json = self.__get_talk_json__(newest_talk['link']) subs = subtitles_scraper.get_subtitles_for_talk( talk_json, ['en'], lambda m1, m2: sys.stdout.write('%s\n%s' % (m1, m2))) if subs: print "Newest Talk (%s) has subtitles: test ineffective" % ( newest_talk['title'])
def test_real_talk(self): soup = MinimalSoup(urllib.urlopen("http://www.ted.com/talks/richard_wilkinson.html").read()) flashvars = subtitles_scraper.get_flashvars(soup) # TED intro, need to offset subtitles with this. # Used to have this at ms granularity - now only s :( self.assertEquals("15", flashvars["introDuration"]) # Talk ID, need this to request subtitles. self.assertEquals("1253", flashvars["ti"]) expected = set( [ "sq", "ar", "hy", "bg", "ca", "zh-cn", "zh-tw", "hr", "cs", "da", "nl", "en", "fr", "ka", "de", "el", "he", "hu", "id", "it", "ja", "ko", "fa", "mk", "pl", "pt", "pt-br", "ro", "ru", "sr", "sk", "es", "th", "tr", "uk", "vi", ] ) self.assertEquals(expected, set(subtitles_scraper.get_languages(soup))) subs = subtitles_scraper.get_subtitles_for_talk(soup, ["banana", "fr", "en"], None) self.assertTrue( subs.startswith( """1 00:00:15,000 --> 00:00:18,000 Vous savez tous que ce que je vais dire est vrai. 2""" ) )