Ejemplo n.º 1
0
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        video_id = mobj.group('id')

        api_json = self._call_api(host, video_id, '', note='Downloading video JSON')

        search_results = api_json.get('search-results', {})
        if 'result' not in search_results:
            raise ExtractorError('Playlist was not found')

        result_list = search_results.get('result', {})
        if isinstance(result_list, dict):
            result_list = [result_list]

        entries = []
        for episode in result_list:
            video = episode.get('mediapackage', {})
            entries.append(self._parse_mediapackage(video))

        if len(entries) == 0:
            raise ExtractorError('Playlist has no entries')

        playlist_title = entries[0].get('series')

        result_obj = self.playlist_result(entries, playlist_id=video_id, playlist_title=playlist_title)
        return result_obj
Ejemplo n.º 2
0
    def _verify_video_password(self, url, video_id, webpage):
        password = self._downloader.params.get('videopassword')
        if password is None:
            raise ExtractorError(
                'This video is protected by a password, use the --video-password option',
                expected=True)
        meetId = self._search_regex(
            r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
        data = urlencode_postdata({
            'id': meetId,
            'passwd': password,
            'action': "viewdetailedpage",
            'recaptcha': ""
        })
        validation_url = url.split(
            "zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
        validation_response = self._download_json(
            validation_url,
            video_id,
            note='Validating Password...',
            errnote='Wrong password?',
            data=data)

        if validation_response['errorCode'] != 0:
            raise ExtractorError(
                'Login failed, %s said: %r' %
                (self.IE_NAME, validation_response['errorMessage']))
Ejemplo n.º 3
0
    def _real_extract(self, url):
        obj = self._get_object_from_url(url)

        obj = obj.get('result')

        typ = obj['type']

        if typ == u't':
            return self._extract_track(obj)

        elif typ not in (u'a', u'p'):
            raise ExtractorError(u"Unknown object type: `{0}'".format(typ))

        # deal with playlists and albums
        tracks = obj.get('tracks', [])

        # XXX sometimes the result is a list, sometimes its a dict with an
        # items item
        if isinstance(tracks, dict):
            tracks = tracks.get('items', [])

        entries = [
            self.url_result(t['shortUrl'], video_id=t['key']) for t in tracks
        ]

        return self.playlist_result(entries, obj['key'], obj['name'])
Ejemplo n.º 4
0
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id, 'Downloading page')

        mobj = re.search(
            r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
            '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
        if mobj is None:
            raise ExtractorError('Video %s does not exist' % video_id,
                                 expected=True)

        file_id = mobj.group('fileid')
        server_id = mobj.group('serverid')

        KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
        keywords = self._html_search_meta('keywords', webpage, 'title')
        title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(
            KEYWORDS_SUFFIX) else ''

        video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
        thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)

        return {
            'id': file_id,
            'url': video_url,
            'thumbnail': thumbnail,
            'title': title
        }
Ejemplo n.º 5
0
    def _real_initialize(self):
        username = self._downloader.params.get('username')
        password = self._downloader.params.get('password')

        if not (username and password):
            raise ExtractorError(u"Please specify your Rdio credentials")

        storage = storage_load()

        user_state = storage.load(username)

        self.rdio = RdioSession()

        if user_state:
            self.rdio._authorization_key = user_state.get('authorization_key')

            cookies = user_state.get('cookies', {})
            self.rdio.cookies = requests.cookies.cookiejar_from_dict(cookies)

        if not self.rdio._authorization_key:
            self.rdio.sign_in(username, password)

            storage.save(
                username, {
                    'cookies': dict(self.rdio.cookies),
                    'authorization_key': self.rdio._authorization_key,
                })
Ejemplo n.º 6
0
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        video_id = mobj.group('id')

        api_json = self._call_api(host, video_id, '', note='Downloading video JSON')

        search_results = api_json.get('search-results', {})
        if 'result' not in search_results:
            raise ExtractorError('Video was not found')

        result_dict = search_results.get('result', {})
        if not isinstance(result_dict, dict):
            raise ExtractorError('More than one video was unexpectedly returned.')

        video = result_dict.get('mediapackage', {})

        result_obj = self._parse_mediapackage(video)
        return result_obj
Ejemplo n.º 7
0
    def _real_extract(self, url):
        video_id = self._match_id(url)
        clean_url = re.search(self._VALID_URL, url).group(0)

        webpage = self._download_webpage(clean_url, video_id)

        pin_info_json = self._search_regex(
            r"<script id=\"initial-state\" type=\"application/json\">(.+?)</script>",
            webpage,
            "Pin data JSON",
        )
        pin_info_full = json.loads(pin_info_json)
        pin_info = next(
            (r for r in pin_info_full["resourceResponses"]
             if r["name"] == "PinResource"),
            None,
        )

        if pin_info:
            pin_data = pin_info["response"]["data"]
            video_urls = pin_data.get("videos", {}).get("video_list", {})
            video_data = video_urls.get("V_HLSV4")
            video_url = video_data.get("url")
            video_thumb = video_data.get("thumbnail")
            if not video_url:
                raise ExtractorError("Can't find a video stream URL")
            title = pin_data.get("title").strip() or "pinterest_video"
            pinner = pin_data.get("pinner", {})
            uploader = pinner.get("full_name") or pinner.get("username")
        else:
            raise ExtractorError("Can't find Pin data")

        return {
            "id": video_id,
            "title": title,
            "description": self._og_search_description(webpage),
            "uploader": uploader,
            "url": video_url,
            "ext": "mp4",
            "manifest_url": video_url,
            "thumbnail": video_thumb,
        }
Ejemplo n.º 8
0
    def test_create__extract_failed__fail(self, podcast, episode_data, user, mocked_youtube, dbs):
        ydl_error = ExtractorError("Something went wrong here", video_id=episode_data["source_id"])
        mocked_youtube.extract_info.side_effect = ydl_error
        episode_creator = EpisodeCreator(
            dbs,
            podcast_id=podcast.id,
            source_url=episode_data["watch_url"],
            user_id=user.id,
        )
        with pytest.raises(YoutubeFetchError) as error:
            await_(episode_creator.create())

        assert error.value.details == f"Extracting data for new Episode failed: {ydl_error}"
Ejemplo n.º 9
0
    def _get_object_from_url(self, url):
        """Get a object (track, album, playlist) from the given URL.
        """
        result = self.rdio.api_call('getObjectFromUrl',
                                    url=url,
                                    extras=['tracks'],
                                    referer=url)
        return result.json()

        if not result.get('result'):
            raise ExtractorError(result.get('message', u"Unknown error"))

        return result.get('result')
Ejemplo n.º 10
0
 def test_create__same_episode__extract_failed__ok(
     self, podcast, episode, user, mocked_youtube, dbs
 ):
     mocked_youtube.extract_info.side_effect = ExtractorError("Something went wrong here")
     new_podcast = await_(Podcast.async_create(dbs, **get_podcast_data()))
     episode_creator = EpisodeCreator(
         dbs,
         podcast_id=new_podcast.id,
         source_url=episode.watch_url,
         user_id=user.id,
     )
     new_episode: Episode = await_(episode_creator.create())
     assert episode is not None
     assert new_episode.id != episode.id
     assert new_episode.source_id == episode.source_id
     assert new_episode.watch_url == episode.watch_url
Ejemplo n.º 11
0
 def _real_extract(self, url):
     video_id = self._match_id(url)
     formats = [{
         'format_id': 'default',
         'url': 'url:',
     }]
     if video_id == '0':
         raise ExtractorError('foo')
     if video_id == '2':
         formats.append({
             'format_id': 'extra',
             'url': TEST_URL,
         })
     return {
         'id': video_id,
         'title': 'Video %s' % video_id,
         'formats': formats,
     }
Ejemplo n.º 12
0
    def _get_playback_info_through_http(self, key, type=u'mp3-high'):
        player_name = '_web_{0}'.format(random_player_id())

        playback_info = self.rdio.api_call('getPlaybackInfo',
                                           key=key,
                                           manualPlay=False,
                                           playerName=player_name,
                                           requiresUnlimited=False,
                                           finishedAd=False,
                                           type=type)

        playback_info = playback_info.json()

        if not playback_info.get('result'):
            reason = playback_info.get('message', u"Unknown error")
            raise ExtractorError(
                u"Failed to get playback information: `{0}'".format(reason))

        return dict(url=playback_info['result']['surl'])
def _call_api(self, path, video_id, query={}):
    headers = {
        'Authorization': AUTH,
    }
    if 'auth_token' in self._downloader.cookiejar.get_dict('.twitter.com'):
        print('auth_token')
        ct0 = self._downloader.cookiejar._cookies.get('.twitter.com',
                                                      {}).get('/',
                                                              {}).get('ct0')
        if ct0 is None or ct0.is_expired():
            print('Expired cookies')
            self._downloader.cookiejar.clear()
            return self._call_api(path, video_id, query)
        headers["x-twitter-auth-type"] = "OAuth2Session"
        headers["x-csrf-token"] = ct0.value
    else:
        if not self._GUEST_TOKEN:
            self._GUEST_TOKEN = self._download_json(
                self._API_BASE + 'guest/activate.json',
                video_id,
                'Downloading guest token',
                data=b'',
                headers=headers)['guest_token']
        headers['x-guest-token'] = self._GUEST_TOKEN

    try:
        return self._download_json(self._API_BASE + path,
                                   video_id,
                                   headers=headers,
                                   query=query)
    except ExtractorError as e:
        if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
            raise ExtractorError(self._parse_json(
                e.cause.read().decode(), video_id)['errors'][0]['message'],
                                 expected=True)
        raise
Ejemplo n.º 14
0
 def error(msg):
     if "This video is no longer available" in msg:
         raise NotAvailableException("notavailable")
     raise ExtractorError("Video Downloading failed.")
Ejemplo n.º 15
0
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            'https://m.tiktok.com/v/%s.html' % video_id, video_id)

        # The webpage will have a json embedded in a <script id="__NEXT_DATA__"> tag. The JSON holds all the metadata, so fetch that out.
        json_string = self._html_search_regex(
            [r'<script\s+id="__NEXT_DATA__"[^>]*>(.*?)</script>'], webpage,
            'next_data')
        json_data = self._parse_json(json_string, video_id)
        video_data = try_get(json_data,
                             lambda x: x['props']['pageProps'],
                             expected_type=dict)

        # The watermarkless video ID is embedded in the first video file, so we need to download it and get the video ID.
        watermarked_url = video_data['videoData']['itemInfos']['video'][
            'urls'][0]
        # watermarked_response = self._download_webpage(watermarked_url, video_id)
        # idpos = watermarked_response.index("vid:")
        # watermarkless_video_id = watermarked_response[idpos + 4:idpos + 36]
        # watermarkless_url = "https://api2-16-h2.musical.ly/aweme/v1/play/?video_id={}&vr_type=0&is_play_url=1&source=PackSourceEnum_PUBLISH&media_type=4".format(watermarkless_video_id)
        watermarkless_url = watermarked_url

        # Get extra metadata
        video_info = try_get(video_data, lambda x: x['videoData']['itemInfos'],
                             dict)
        author_info = try_get(video_data,
                              lambda x: x['videoData']['authorInfos'], dict)
        share_info = try_get(video_data, lambda x: x['shareMeta'], dict)
        unique_id = str_or_none(author_info.get('uniqueId'))
        timestamp = try_get(video_info, lambda x: int(x['createTime']), int)
        height = try_get(video_info,
                         lambda x: x['video']['videoMeta']['height'], int)
        width = try_get(video_info, lambda x: x['video']['videoMeta']['width'],
                        int)
        thumbnails = []
        thumbnails.append({
            'url':
            video_info.get('thumbnail') or self._og_search_thumbnail(webpage),
            'width':
            width,
            'height':
            height
        })

        formats = []
        formats.append({
            'url': watermarkless_url,
            'ext': 'mp4',
            'height': height,
            'width': width
        })

        if video_data.get('statusCode') != 0:
            raise ExtractorError('Video not available', video_id=video_id)

        return {
            'id':
            video_id,
            'title':
            self._og_search_title(webpage),
            'description':
            str_or_none(video_info.get('text'))
            or str_or_none(share_info.get('desc')),
            'comment_count':
            int_or_none(video_info.get('commentCount')),
            'duration':
            try_get(video_info, lambda x: x['video']['videoMeta']['duration'],
                    int),
            'height':
            height,
            'like_count':
            int_or_none(video_info.get('diggCount')),
            'repost_count':
            int_or_none(video_info.get('shareCount')),
            'thumbnail':
            try_get(video_info, lambda x: x['covers'][0], str),
            'timestamp':
            timestamp,
            'width':
            width,
            'creator':
            str_or_none(author_info.get('nickName')),
            'uploader':
            unique_id,
            'uploader_id':
            str_or_none(author_info.get('userId')),
            'uploader_url':
            'https://www.tiktok.com/@' + unique_id,
            'thumbnails':
            thumbnails,
            'webpage_url':
            self._og_search_url(webpage),
            'ext':
            'mp4',
            'formats':
            formats,
            'http_headers': {
                'User-Agent': 'okhttp',
            }
        }
Ejemplo n.º 16
0
 def report_warning(self, message):
     # Don't accept warnings during tests
     raise ExtractorError(message)
ie = TestIE(FakeYDL({'verbose': False}))
script_id = 'mastodon'
results = set()


def sanitize_hostname(hostname):
    # trim trailing slashes
    hostname = re.sub(r'[/\\]+$', '', hostname)
    # trim port number
    hostname = re.sub(r':\d+$', '', hostname)
    return hostname


instance_social_api_key = os.environ['INSTANCE_SOCIAL_API_SECRET']
if not instance_social_api_key:
    raise ExtractorError('You must set INSTANCE_SOCIAL_API_SECRET to work')

min_id = None
while True:
    url = 'https://instances.social/api/1.0/instances/list'
    if min_id:
        url = f'{url}?min_id={min_id}'
    data = ie._download_json(
        url, script_id, note=f'Paging {min_id}, len(results)={len(results)}',
        headers={'Authorization': f'Bearer {instance_social_api_key}'})
    for instance in data['instances']:
        results.add(sanitize_hostname(instance['name']))
    min_id = data['pagination'].get('next_id')
    if not min_id:
        break
Ejemplo n.º 18
0
            script_id,
            note=
            'Scraping https://the-federation.info/peertube, len(results)=%d' %
            (len(results)),
            headers={
                'content-type': 'application/json, application/graphql',
                'accept': 'application/json, application/graphql',
            })
        for instance in data['data']['nodes']:
            results.add(sanitize_hostname(instance['host']))
        break
    except BaseException:
        continue

if not results:
    raise ExtractorError('no instances found')

results = {x.encode('idna').decode('utf8') for x in results}
ie.to_screen('%s: converted domain names to punycode, len(results)=%d' %
             (script_id, len(results)))

results = {x for x in results if '.' in x}
ie.to_screen('%s: excluded domain names without dot, len(results)=%d' %
             (script_id, len(results)))

results = {
    x
    for x in results
    if not (x.endswith('.ngrok.io') or x.endswith('.localhost.run')
            or x.endswith('.serveo.net'))
}