def test_get_subtitles_for_talk(self):
        talk_json = self.__get_talk_json__('http://www.ted.com/talks/richard_wilkinson.html')

        subs = subtitles_scraper.get_subtitles_for_talk(talk_json, ['banana', 'fr', 'en'], None)
        self.assertTrue(subs.startswith('''1
00:00:11,820 --> 00:00:14,820
Vous savez tous que ce que je vais dire est vrai.

2'''))
    def test_get_subtitles_for_newest_talk(self):
        '''
        Newest talk often won't have subtitles when first made available.
        When this is the case we must return None and not throw.
        '''
        from rss_scraper import NewTalksRss
        newest_talk = sorted(NewTalksRss(None).get_new_talks(), key=lambda t: time.strptime(t['date'], "%d.%m.%Y"), reverse=True)[0]

        talk_json = self.__get_talk_json__(newest_talk['link'])
        subs = subtitles_scraper.get_subtitles_for_talk(talk_json, ['en'], lambda m1, m2: sys.stdout.write('%s\n%s' % (m1, m2)))
        if subs:
            print "Newest Talk (%s) has subtitles: test ineffective" % (newest_talk['title'])
Exemple #3
0
    def test_get_subtitles_for_talk(self):
        talk_json = self.__get_talk_json__(
            'http://www.ted.com/talks/richard_wilkinson.html')

        subs = subtitles_scraper.get_subtitles_for_talk(
            talk_json, ['banana', 'fr', 'en'], None)
        self.assertTrue(
            subs.startswith('''1
00:00:11,820 --> 00:00:14,820
Vous savez tous que ce que je vais dire est vrai.

2'''))
Exemple #4
0
    def test_get_subtitles_for_newest_talk(self):
        '''
        Newest talk often won't have subtitles when first made available.
        When this is the case we must return None and not throw.
        '''
        from rss_scraper import NewTalksRss
        newest_talk = sorted(
            NewTalksRss(None).get_new_talks(),
            key=lambda t: time.strptime(t['date'], "%d.%m.%Y"),
            reverse=True)[0]

        talk_json = self.__get_talk_json__(newest_talk['link'])
        subs = subtitles_scraper.get_subtitles_for_talk(
            talk_json, ['en'], lambda m1, m2: sys.stdout.write('%s\n%s' %
                                                               (m1, m2)))
        if subs:
            print "Newest Talk (%s) has subtitles: test ineffective" % (
                newest_talk['title'])
    def test_real_talk(self):
        soup = MinimalSoup(urllib.urlopen("http://www.ted.com/talks/richard_wilkinson.html").read())
        flashvars = subtitles_scraper.get_flashvars(soup)

        # TED intro, need to offset subtitles with this.
        # Used to have this at ms granularity - now only s :(
        self.assertEquals("15", flashvars["introDuration"])

        # Talk ID, need this to request subtitles.
        self.assertEquals("1253", flashvars["ti"])

        expected = set(
            [
                "sq",
                "ar",
                "hy",
                "bg",
                "ca",
                "zh-cn",
                "zh-tw",
                "hr",
                "cs",
                "da",
                "nl",
                "en",
                "fr",
                "ka",
                "de",
                "el",
                "he",
                "hu",
                "id",
                "it",
                "ja",
                "ko",
                "fa",
                "mk",
                "pl",
                "pt",
                "pt-br",
                "ro",
                "ru",
                "sr",
                "sk",
                "es",
                "th",
                "tr",
                "uk",
                "vi",
            ]
        )
        self.assertEquals(expected, set(subtitles_scraper.get_languages(soup)))

        subs = subtitles_scraper.get_subtitles_for_talk(soup, ["banana", "fr", "en"], None)
        self.assertTrue(
            subs.startswith(
                """1
00:00:15,000 --> 00:00:18,000
Vous savez tous que ce que je vais dire est vrai.

2"""
            )
        )