def fix_url(cls, url):  # 2033
     if not re.match('https?://.+', url, re.IGNORECASE):
         url = 'https://www.youtube.com/watch?v={}'.format(url)
     qs = query_url(url)
     if 'v' in qs:
         url = url.split('?')[0] + '?v={}'.format(qs['v'][0])
     return url
def get_id(url):
    for pattern in PATTERNS:
        m = re.match(pattern, url)
        if m is None:
            continue
        username = m.group('username')
        pid = m.group('pid')
        break
    else:
        username, pid = None, None
    return username, pid
 def init(self):
     if not re.match('https?://.+', self.url, re.IGNORECASE):
         self.url = 'https://www.nicovideo.jp/watch/{}'.format(self.url)
Example #4
0
 def fix_url(cls, url):
     if not re.match('https?://.+', url, re.IGNORECASE):
         url = 'https://video.fc2.com/content/{}'.format(url)
     return url
Example #5
0
 def id(self):
     mobj = re.match(_VALID_URL, self.url)
     video_id = mobj.group('id')
     anime_id = mobj.group('anime_id')
     return video_id
Example #6
0
def get_videos(url, cw=None, depth=0):
    print_ = get_print(cw)
    if utils.ui_setting:
        res_text = compatstr(utils.ui_setting.youtubeCombo_res.currentText())
        res = {
            '720p': 720,
            '1080p': 1080,
            '2K': 1440,
            '4K': 2160,
            '8K': 4320
        }[res_text]
    else:
        res = 720

    mobj = re.match(_VALID_URL, url)
    video_id = mobj.group('id')
    anime_id = mobj.group('anime_id')
    print(video_id, anime_id)
    print_ = get_print(cw)
    html = downloader.read_html(url, methods={'requests'})
    soup = Soup(html)
    title = soup.find('h1').attrs['title'].strip()
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    p = get_page(url)
    if p is None:
        p = 1
    print('page:', p)
    if p > 1:
        pages = get_pages(html)
        cid = pages[(p - 1)]['cid']
    else:
        cid = re.findall('\\bcid(?:["\\\']:|=)(\\d+)', html)[0]
    print_('cid: {}'.format(cid))
    headers = {'Referer': url}
    entries = []

    RENDITIONS = [
        'qn={}&quality={}&type='.format(qlt, qlt) for qlt in RESOLS.keys()
    ]  # + ['quality=2&type=mp4']

    for num, rendition in enumerate(RENDITIONS, start=1):
        print('####', num, rendition)
        payload = 'appkey=%s&cid=%s&otype=json&%s' % (_APP_KEY, cid, rendition)
        sign = hashlib.md5(
            (payload + _BILIBILI_KEY).encode('utf-8')).hexdigest()
        url_json = 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (
            payload, sign)
        s_json = downloader.read_html(url_json)
        print(s_json[:1000])
        video_info = json.loads(s_json)
        if not video_info:
            continue
        if 'durl' not in video_info:
            print('#### error', num)
            if num < len(RENDITIONS):
                continue
            msg = video_info.get('message')
            if msg:
                raise Exception(msg)
        quality = video_info['quality']
        resolution = get_resolution(quality)
        s = (u'resolution: {}').format(resolution)
        print_(s)

        # 2184
        if int(re.find('([0-9]+)p', resolution)) > res:
            print_('skip resolution')
            continue

        for idx, durl in enumerate(video_info['durl']):
            # 1343
            if idx == 0:
                size = downloader.get_size(durl['url'], referer=url)
                if size < 1024 * 1024 and depth == 0:
                    print_('size is too small')
                    return get_videos(url, cw, depth + 1)

            formats = [{
                'url': durl['url'],
                'filesize': int_or_none(durl['size'])
            }]
            for backup_url in durl.get('backup_url', []):
                formats.append({
                    'url':
                    backup_url,
                    'preference':
                    -2 if 'hd.mp4' in backup_url else -3
                })

            for a_format in formats:
                a_format.setdefault('http_headers',
                                    {}).update({'Referer': url})

            entries.append({
                'id': '%s_part%s' % (video_id, idx),
                'duration': float_or_none(durl.get('length'), 1000),
                'formats': formats
            })

        break

    videos = []
    for entry in entries:
        url_video = entry['formats'][0]['url']
        video = Video(url_video, url, video_id, len(videos))
        videos.append(video)

    info = {'title': clean_title(title), 'url_thumb': url_thumb}
    return (videos, info)
 def init(self):
     if not re.match('https?://.+', self.url, re.IGNORECASE):
         self.url = 'https://tv.naver.com/v/{}'.format(self.url)
Example #8
0
 def url_or_none(url):
     if not url or not isinstance(url, str):
         return None
     url = url.strip()
     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None