def test_info_json(self):
        expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
        ie = youtube_dl.extractor.YoutubeIE()
        ydl = YoutubeDL(params)
        ydl.add_info_extractor(ie)
        ydl.download([TEST_ID])
        self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
        annoxml = None
        with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
                annoxml = xml.etree.ElementTree.parse(annof)
        self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
        root = annoxml.getroot()
        self.assertEqual(root.tag, 'document')
        annotationsTag = root.find('annotations')
        self.assertEqual(annotationsTag.tag, 'annotations')
        annotations = annotationsTag.findall('annotation')

        #Not all the annotations have TEXT children and the annotations are returned unsorted.
        for a in annotations:
                self.assertEqual(a.tag, 'annotation')
                if a.get('type') == 'text':
                        textTag = a.find('TEXT')
                        text = textTag.text
                        self.assertTrue(text in expected) #assertIn only added in python 2.7
                        #remove the first occurance, there could be more than one annotation with the same text
                        expected.remove(text)
        #We should have seen (and removed) all the expected annotation texts.
        self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
    def test_info_json(self):
        expected = list(EXPECTED_ANNOTATIONS)  # Two annotations could have the same text.
        ie = youtube_dl.extractor.YoutubeIE()
        ydl = YoutubeDL(params)
        ydl.add_info_extractor(ie)
        ydl.download([TEST_ID])
        self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
        annoxml = None
        with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
            annoxml = xml.etree.ElementTree.parse(annof)
        self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
        root = annoxml.getroot()
        self.assertEqual(root.tag, 'document')
        annotationsTag = root.find('annotations')
        self.assertEqual(annotationsTag.tag, 'annotations')
        annotations = annotationsTag.findall('annotation')

        # Not all the annotations have TEXT children and the annotations are returned unsorted.
        for a in annotations:
            self.assertEqual(a.tag, 'annotation')
            if a.get('type') == 'text':
                textTag = a.find('TEXT')
                text = textTag.text
                self.assertTrue(text in expected)  # assertIn only added in python 2.7
                # remove the first occurrence, there could be more than one annotation with the same text
                expected.remove(text)
        # We should have seen (and removed) all the expected annotation texts.
        self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
    def test_info_json(self):
        ie = youtube_dl.extractor.YoutubeIE()
        ydl = YoutubeDL(params)
        ydl.add_info_extractor(ie)
        ydl.download([TEST_ID])
        self.assertTrue(os.path.exists(INFO_JSON_FILE))
        with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
            jd = json.load(jsonf)
        self.assertEqual(jd['upload_date'], u'20121002')
        self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
        self.assertEqual(jd['id'], TEST_ID)
        self.assertEqual(jd['extractor'], 'youtube')
        self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''')
        self.assertEqual(jd['uploader'], 'Philipp Hagemeister')

        self.assertTrue(os.path.exists(DESCRIPTION_FILE))
        with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf:
            descr = descf.read()
        self.assertEqual(descr, EXPECTED_DESCRIPTION)
Beispiel #4
0
    def test_info_json(self):
        ie = youtube_dl.extractor.YoutubeIE()
        ydl = YoutubeDL(params)
        ydl.add_info_extractor(ie)
        ydl.download([TEST_ID])
        self.assertTrue(os.path.exists(INFO_JSON_FILE))
        with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
            jd = json.load(jsonf)
        self.assertEqual(jd['upload_date'], u'20121002')
        self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
        self.assertEqual(jd['id'], TEST_ID)
        self.assertEqual(jd['extractor'], 'youtube')
        self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''')
        self.assertEqual(jd['uploader'], 'Philipp Hagemeister')

        self.assertTrue(os.path.exists(DESCRIPTION_FILE))
        with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf:
            descr = descf.read()
        self.assertEqual(descr, EXPECTED_DESCRIPTION)
Beispiel #5
0
    def test_template(self):
        ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
        if not ie._WORKING:
            print('Skipping: IE marked as not _WORKING')
            return
        if 'playlist' not in test_case and not test_case['file']:
            print('Skipping: No output file specified')
            return
        if 'skip' in test_case:
            print('Skipping: {0}'.format(test_case['skip']))
            return

        params = self.parameters.copy()
        params.update(test_case.get('params', {}))

        ydl = YoutubeDL(params)
        for ie in youtube_dl.extractor.gen_extractors():
            ydl.add_info_extractor(ie)
        finished_hook_called = set()
        def _hook(status):
            if status['status'] == 'finished':
                finished_hook_called.add(status['filename'])
        ydl.fd.add_progress_hook(_hook)

        test_cases = test_case.get('playlist', [test_case])
        for tc in test_cases:
            _try_rm(tc['file'])
            _try_rm(tc['file'] + '.part')
            _try_rm(tc['file'] + '.info.json')
        try:
            for retry in range(1, RETRIES + 1):
                try:
                    ydl.download([test_case['url']])
                except (DownloadError, ExtractorError) as err:
                    if retry == RETRIES: raise

                    # Check if the exception is not a network related one
                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
                        raise

                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
                else:
                    break

            for tc in test_cases:
                if not test_case.get('params', {}).get('skip_download', False):
                    self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
                    self.assertTrue(tc['file'] in finished_hook_called)
                self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
                if 'md5' in tc:
                    md5_for_file = _file_md5(tc['file'])
                    self.assertEqual(md5_for_file, tc['md5'])
                with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
                    else:
                        got = info_dict.get(info_field)
                        self.assertEqual(
                            expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))

                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                    for key, value in info_dict.items()
                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')

                # Check for the presence of mandatory fields
                for key in ('id', 'url', 'title', 'ext'):
                    self.assertTrue(key in info_dict.keys() and info_dict[key])
        finally:
            for tc in test_cases:
                _try_rm(tc['file'])
                _try_rm(tc['file'] + '.part')
                _try_rm(tc['file'] + '.info.json')