Python matchall Examples, ykdl.util.match.matchall Python Examples

Example #1

0

Show file

File: fun.py Project: netlovehf/ykdl

def fetch_mozecname(vid):
    # vid: seems non-interrelated with result

    global mozecname
    if len(mozecname) == 4:
        return

    digits = list(
        '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')

    def i2b(d):
        if base == 10:
            return str(d)
        b = []
        while d:
            d, m = divmod(d, base)
            b.append(digits[m])
        return ''.join(b[::-1]) or '0'

    def b2i(text):
        try:
            i = int(text, base)
        except ValueError:
            i = p = 0
            for d in text[::-1]:
                i += digits.index(d) * base**p
                p += 1
        return i

    def encrypt(text):
        '''fake, only for existed texts'''
        return i2b(keys_dict[text])

    def decrypt(text):
        return keys_list[b2i(text)] or text

    html = get_content('https://m.fun.tv/vplay/?vid=' + vid)
    for path in matchall(html[:html.find('</head>')],
                         'src="(/static/js/v12/pkg/m\w{4}_v12_\w{9}.js)"'):
        js = get_content('https://m.fun.tv' + path).strip()
        crypt, base, _, keys, sep = re.search(
            r"}\('(.+?)[^\\]',(\d+),(\d+),'(.+?)'\.split\('(.)'", js).groups()
        base = int(base)
        keys_list = keys.split(sep)
        keys_dict = {k: i for i, k in enumerate(keys_list)}
        pattern = '\\.'.join(
            encrypt(text)
            for text in ['document', 'mozEcName', 'push']) + '\("(\w+)'
        mozecname += [decrypt(text) for text in matchall(crypt, pattern)]
    site.logger.debug('mozEcName: %s', mozecname)
    mozecname = {int(m[-1]): int(m[:-1], 16) for m in mozecname}

Example #2

0

Show file

File: video.py Project: yslinwe/bilibiliupload

    def get_vid_title(self):
        av_id = match1(self.url, '(?:/av|aid=)(\d+)')
        page_index = '1'
        if "#page=" in self.url or "?p=" in self.url or 'index_' in self.url:
            page_index = match1(self.url, '(?:#page|\?p)=(\d+)',
                                'index_(\d+)\.')
        if page_index == '1':
            self.url = 'https://www.bilibili.com/av{}/'.format(av_id)
        else:
            self.url = 'https://www.bilibili.com/av{}/?p={}'.format(
                av_id, page_index)
        if not self.vid:
            html = get_content(self.url)
            #vid = match1(html, 'cid=(\d+)', 'cid="(\d+)', '"cid":(\d+)')
            title = match1(html, '"title":"([^"]+)', '<h1 title="([^"]+)',
                           '<title>([^<]+)').strip()
            video_list = matchall(
                html,
                ['"cid":(\d+),"page":(\d+),"from":"[^"]+","part":"([^"]*)",'])
            for cid, page, part in video_list:
                if page == page_index:
                    vid = cid
                    if len(video_list) > 1:
                        title = u'{} - {} - {}'.format(title, page, part)
                    elif part:
                        title = u'{} - {}'.format(title, part)
                    break

        return vid, title

Example #3

0

Show file

File: video.py Project: wwqgtxx/ykdl

 def prepare_list(self):
     av_id = match1(self.url, '(?:/av|aid=)(\d+)')
     self.url = 'https://www.bilibili.com/av{}/'.format(av_id)
     html = get_content(self.url)
     video_list = matchall(html, ['"page":(\d+),'])
     if video_list:
         return ['https://www.bilibili.com/av{}/?p={}'.format(av_id, p) for p in video_list]

Example #4

0

Show file

File: video.py Project: PureTV/ykdl

def get_realurl(url):
    location = get_location(url)
    if location != url:
        return location
    else:
       html = get_content(url)
       return matchall(html, ['CDATA\[([^\]]+)'])[1]

Example #5

0

Show file

File: gongkaike.py Project: netlovehf/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if not self.vid:
            self.vid = match1(self.url, '#([a-zA-Z0-9\-]+)',
                              '/([a-zA-Z0-9\-]+).shtml')
        if not self.vid:
            html = get_content(self.url)
            self.vid = match1(html, '"vid": "([^"]+)', 'vid: "([^"]+)')

        xml = get_content(
            'http://vxml.ifengimg.com/video_info_new/{}/{}/{}.xml'.format(
                self.vid[-2], self.vid[-2:], self.vid))

        info.title = match1(xml, 'SE_Title="([^"]+)')
        urls = matchall(xml, 'playurl="([^"]+)')
        urls = ['http://ips.ifeng.com/' + u[7:] for u in urls]
        info.stream_types.append('current')
        info.streams['current'] = {
            'container': 'mp4',
            'video_profile': 'current',
            'src': urls,
            'size': 0
        }

        return info

Example #6

0

Show file

File: iqiyi.py Project: wanmeiw/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if self.url and not self.vid:
            vid = matchall(self.url, ['curid=([^_]+)_([\w]+)'])
            if vid:
                self.vid = vid[0]
                info_u = 'http://mixer.video.iqiyi.com/jp/mixin/videos/' + self.vid[
                    0]
                mixin = get_content(info_u)
                mixin_json = json.loads(mixin[len('var tvInfoJs='):])
                real_u = mixin_json['url']
                real_html = get_content(real_u)
                info.title = match1(real_html, '<title>([^<]+)').split('-')[0]

        if self.url and not self.vid:
            html = get_content(self.url)
            video_info = match1(html, ":video-info='(.+?)'")
            if video_info:
                video_info = json.loads(video_info)
                self.vid = str(video_info['tvId']), str(video_info['vid'])
                info.title = video_info['name']
            else:

                tvid = match1(html, 'data-player-tvid="([^"]+)"',
                              'tvid="(.+?)"', 'tvId:([^,]+)',
                              r'''param\['tvid'\]\s*=\s*"(.+?)"''',
                              r'"tvid":\s*"(\d+)"')
                videoid = match1(html, 'data-player-videoid="([^"]+)"',
                                 'vid="(.+?)"', 'vid:"([^"]+)',
                                 r'''param\['vid'\]\s*=\s*"(.+?)"''',
                                 r'"vid":\s*"(\w+)"')
                self.vid = (tvid, videoid)
                info.title = match1(html, '<title>([^<]+)').split('-')[0]

        tvid, vid = self.vid
        vps_data = getvps(tvid, vid)
        assert vps_data['code'] == 'A00000', 'can\'t play this video!!'
        url_prefix = vps_data['data']['vp']['du']
        stream = vps_data['data']['vp']['tkl'][0]
        vs_array = stream['vs']
        for vs in vs_array:
            bid = vs['bid']
            fs_array = vs['fs']
            real_urls = []
            for seg_info in fs_array:
                url = url_prefix + seg_info['l']
                json_data = json.loads(get_content(url))
                down_url = json_data['l']
                real_urls.append(down_url)
            stream = self.vd_2_id[bid]
            info.stream_types.append(stream)
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {
                'video_profile': stream_profile,
                'container': 'flv',
                'src': real_urls,
                'size': 0
            }
        info.stream_types = sorted(info.stream_types, key=self.ids.index)
        return info

Example #7

0

Show file

 def prepare(self):
     info = VideoInfo(self.name)
     id1 = match1(self.url, 'a/([^\.]+)\.')
     api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1)
     video_data1 = get_content(api1)
     self.vid = match1(video_data1, '<omsid>([^<]+)<')
     if self.vid == '0' or not self.vid:
         html = get_content(self.url)
         id1 = match1(html, 'xmlid=([^\"]+)') or match1(
             html, 'embed/([^\"]+)').replace('_', '/')
         api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1)
         video_data1 = get_content(api1)
         self.vid = match1(video_data1, '<omsid>([^<]+)<')
     assert self.vid != '0' and self.vid, self.url + ': Not a video news link!'
     api2 = 'http://vapi.kankanews.com/index.php?app=api&mod=public&act=getvideo&id={}'.format(
         self.vid)
     video_data2 = get_content(api2)
     urls = matchall(video_data2, ['<videourl><!\[CDATA\[([^\]]+)'])
     info.title = match1(video_data2, '<otitle><!\[CDATA\[([^\]]+)')
     info.stream_types.append('current')
     info.streams['current'] = {
         'container': 'mp4',
         'video_profile': 'current',
         'src': urls,
         'size': 0
     }
     return info

Example #8

0

Show file

 def prepare_list(self):
     av_id = match1(self.url, '(?:/av|aid=)(\d+)')
     self.url = 'https://www.bilibili.com/av{}/'.format(av_id)
     html = get_content(self.url)
     video_list = matchall(html, ['"page":(\d+),'])
     if video_list:
         return ['https://www.bilibili.com/av{}/?p={}'.format(av_id, p) for p in video_list]

Example #9

0

Show file

def get_realurl(url):
    location = get_location(url)
    if location != url:
        return location
    else:
        html = get_content(url)
        return matchall(html, ['CDATA\[([^\]]+)'])[1]

Example #10

0

Show file

    def prepare(self):

        if self.url and not self.vid:
            #maybe error!!
            self.vid = (vu, uu) = matchall(self.url,
                                           ["vu=([^&]+)", "uu=([^&]+)"])
        return self.letvcloud_download_by_vu()

Example #11

0

Show file

File: le.py Project: wiseideal/ykdl

    def download_playlist(self, url, param):
        self.url = url
        self.param = param
        html = get_content(self.url)

        vids = matchall(html, ['vid="(\d+)"'])
        for v in vids:
            self.download(v, param)

Example #12

0

Show file

File: le.py Project: Yu1984/ykdl

    def download_playlist(self, url, param):
        self.url = url
        self.param = param
        html = get_content(self.url)

        vids = matchall(html, ['vid="(\d+)"'])
        for v in vids:
            self.download(v,param)

Example #13

0

Show file

File: video.py Project: pmphxs/ykdl

 def prepare_list(self):
     av_id = match1(self.url, '(?:/av|aid=)(\d+)')
     if "aid=" in self.url:
         self.url = 'https://www.bilibili.com/video/av' + av_id
     html = get_content(self.url)
     video_list = matchall(html, ['"page":(\d+),'])
     if video_list:
         return ['https://www.bilibili.com/av{}/index_{}.html'.format(av_id, p) for p in video_list]

Example #14

0

Show file

File: bangumi.py Project: wanmeiw/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     video_list = matchall(html, [',"ep_id":(\d+),'])
     if video_list:
         del video_list[0]
         return [
             'https://www.bilibili.com/bangumi/play/ep{}'.format(eid)
             for eid in video_list
         ]

Example #15

0

Show file

File: m3g.py Project: wwqgtxx/ykdl

 def get_url(self):
     if self.url_patterns:
         v_url = []
         for url in matchall(self.html, self.url_patterns):
             if url[:2] == '//':
                 url = 'http:' + url
             if url not in v_url:
                 v_url.append(url)
         self.v_url = v_url

Example #16

0

Show file

File: video.py Project: yuyaokeng/ykdl

 def prepare_list(self):
     # backup https://api.bilibili.com/x/player/pagelist?bvid=
     vid = match1(self.url, '/(av\d+|(?:BV|bv)[0-9A-Za-z]{10})')
     if vid[:2] == 'av':
         vid = av2bv(vid)
     html = get_content(self.url)
     video_list = matchall(html, ['"page":(\d+),'])
     if video_list:
         return ['https://www.bilibili.com/video/{}?p={}'.format(vid, p) for p in video_list]

Example #17

0

Show file

 def prepare_list(self):
     html = get_content(self.url)
     eplist = match1(html, '"epList":(\[.+?\])')
     if eplist:
         eplist = matchall(eplist, [',"id":(\d+),'])
         return [
             'https://www.bilibili.com/bangumi/play/ep{}'.format(eid)
             for eid in eplist
         ]

Example #18

0

Show file

File: m3g.py Project: netlovehf/ykdl

 def get_url(self):
     if self.url_patterns:
         v_url = []
         for url in matchall(self.html, *self.url_patterns):
             if url[:2] == '//':
                 url = 'http:' + url
             if url not in v_url:
                 v_url.append(url)
         self.v_url = v_url

Example #19

0

Show file

File: video.py Project: togitss/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     video_list = matchall(html, ['<option value=\'([^\']*)\''])
     if video_list:
         return ['http://www.bilibili.com'+v for v in video_list]
     sid = match1(html, 'var season_id = "(\d+)";')
     j_ = get_content("http://bangumi.bilibili.com/jsonp/seasoninfo/{}.ver".format(sid))
     s_data = json.loads(j_)
     urls = [e['webplay_url'] for e in sorted(s_data['result']['episodes'], key=lambda e: e['index'])]
     return urls

Example #20

0

Show file

File: bangumi.py Project: yisuiyue123/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     eplist = matchall(html, ['"epList":(\[.*?\])'])
     if eplist:
         eplist = sum(
             map(matchall, eplist, [[',"id":(\d+),']] * len(eplist)), [])
         return [
             'https://www.bilibili.com/bangumi/play/ep{}'.format(eid)
             for eid in eplist
         ]

Example #21

0

Show file

File: openc.py Project: flfq/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if self.url:
            html = get_content(self.url)
            self.vid = match1(html, 'playVideo\(\"(\d+)')

        self.logger.debug("VID: {}".format(self.vid))

        xml = video_info_xml(self.vid)

        info.title = match1(xml, '<vname><!\[CDATA\[([^\]]+)')
        urls = matchall(xml, ['<url><!\[CDATA\[([^\]]+)'])
        sizes = matchall(xml, ['<filesize>([^<]+)'])
        size = 0
        for s in sizes:
            size += int(s)

        info.stream_types.append('current')
        info.streams['current'] = {'container': 'hlv', 'video_profile': 'current', 'src': urls, 'size' : size}
        return info

Example #22

0

Show file

File: live.py Project: Yu1984/ykdl

    def prepare(self):
        self.live = True
        if not self.vid:
            html = get_content(self.url)
            self.vid = match1(html, 'cid=([^&]+)')
            t = match1(html, '<title>([^<]+)').split('-')
            self.title = t[0]
            self.artist = t[1]

        info = get_content('http://live.bilibili.com/api/playurl?cid={}'.format(self.vid))
        urls = [matchall(info, ['CDATA\[([^\]]+)'])[1]]
        size = float('inf')
        ext = 'flv'

        self.stream_types.append('current')
        self.streams['current'] = {'container': ext, 'video_profile': 'current', 'src' : urls, 'size': size}

Example #23

0

Show file

File: live.py Project: PureTV/ykdl

    def prepare(self):
        info = VideoInfo(self.name, True)
        if not self.vid:
            html = get_content(self.url)
            self.vid = match1(html, 'var ROOMID = (\d+);')
            t = match1(html, '<title>([^<]+)').split('-')
            info.title = t[0]
            info.artist = t[1]

        data = get_content('http://live.bilibili.com/api/playurl?cid={}'.format(self.vid))
        urls = [matchall(data, ['CDATA\[([^\]]+)'])[1]]
        size = float('inf')
        ext = 'flv'

        info.stream_types.append('current')
        info.streams['current'] = {'container': ext, 'video_profile': 'current', 'src' : urls, 'size': size}
        return info

Example #24

0

Show file

File: joy.py Project: PureTV/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if not self.vid:
            self.vid = match1(self.url, 'resourceId=([0-9]+)')
        if not self.url:
            self.url = "http://www.joy.cn/video?resourceId={}".format(self.vid)

        html= get_content(self.url)

        info.title = match1(html, '<meta content=\"([^\"]+)')

        url = matchall(html, ['<source src=\"([^\"]+)'])[3]

        _, ext, size = url_info(url)

        info.stream_types.append('current')
        info.streams['current'] = {'container': ext, 'src': [url], 'size': size }
        return info

Example #25

0

Show file

File: iqiyi.py Project: togitss/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if self.url and not self.vid:
            vid = matchall(self.url, ['curid=([^_]+)_([\w]+)'])
            if vid:
                self.vid = vid[0]
                info.title = self.name + '_' + str(self.vid)

        if self.url and not self.vid:
            html = get_content(self.url)
            tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid=([^&]+)',
                          'tvId:([^,]+)')
            videoid = match1(html, 'data-player-videoid="([^"]+)"',
                             'vid=([^&]+)', 'vid:"([^"]+)')
            self.vid = (tvid, videoid)
            info.title = match1(html, '<title>([^<]+)').split('-')[0]

        tvid, vid = self.vid
        vps_data = getvps(tvid, vid)
        assert vps_data['code'] == 'A00000', 'can\'t play this video!!'
        url_prefix = vps_data['data']['vp']['du']
        stream = vps_data['data']['vp']['tkl'][0]
        vs_array = stream['vs']
        for vs in vs_array:
            bid = vs['bid']
            fs_array = vs['fs']
            real_urls = []
            for seg_info in fs_array:
                url = url_prefix + seg_info['l']
                json_data = json.loads(get_content(url))
                down_url = json_data['l']
                real_urls.append(down_url)
            stream = self.vd_2_id[bid]
            info.stream_types.append(stream)
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {
                'video_profile': stream_profile,
                'container': 'flv',
                'src': real_urls,
                'size': 0
            }
        info.stream_types = sorted(info.stream_types, key=self.ids.index)
        return info

Example #26

0

Show file

File: kankanews.py Project: wwqgtxx/ykdl

 def prepare(self):
     info = VideoInfo(self.name)
     id1 = match1(self.url, 'a/([^\.]+)\.')
     api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1)
     video_data1 = get_content(api1)
     self.vid = match1(video_data1, '<omsid>([^<]+)<')
     if self.vid == '0' or not self.vid:
         html = get_content(self.url)
         id1 = match1(html, 'xmlid=([^\"]+)') or match1(html, 'embed/([^\"]+)').replace('_', '/')
         api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1)
         video_data1 = get_content(api1)
         self.vid = match1(video_data1, '<omsid>([^<]+)<')
     assert self.vid != '0' and self.vid, self.url + ': Not a video news link!'
     api2 = 'http://vapi.kankanews.com/index.php?app=api&mod=public&act=getvideo&id={}'.format(self.vid)
     video_data2 = get_content(api2)
     urls = matchall(video_data2, ['<videourl><!\[CDATA\[([^\]]+)'])
     info.title = match1(video_data2, '<otitle><!\[CDATA\[([^\]]+)')
     info.stream_types.append('current')
     info.streams['current'] = {'container': 'mp4', 'video_profile': 'current', 'src' : urls, 'size': 0}
     return info

Example #27

0

Show file

    def prepare(self):
        info = VideoInfo(self.name)
        html = get_content(self.url)
        self.vid = match1(html, 'webcfg\s*=\s*{"id":\s*(\d+)')
        xml = get_content(
            'http://web-play.pptv.com/webplay3-0-{}.xml?type=web.fpp'.format(
                self.vid))

        host = match1(xml, '<sh>([^<>]+)</sh>')
        k = match1(xml, '<key expire=[^<>]+>([^<>]+)</key>')
        rid = match1(xml, 'rid="([^"]+)"')
        info.title = match1(xml, 'nm="([^"]+)"')

        st = match1(xml, '<st>([^<>]+)</st>')[:-4]
        st = time.mktime(
            time.strptime(st)) * 1000 - 60 * 1000 - time.time() * 1000
        st += time.time() * 1000
        st = st / 1000
        key = constructKey(st)

        pieces = matchall(xml, ['<sgm no="(\d+)"[^<>]+fs="(\d+)"'])

        numbers, fs = zip(*pieces)
        urls = [
            "http://{}/{}/{}?key={}&fpp.ver=1.3.0.4&k={}&type=web.fpp".format(
                host, i, rid, key, k)
            for i in range(max(map(int, numbers)) + 1)
        ]

        total_size = sum(map(int, fs))

        info.stream_types.append('current')
        info.streams['current'] = {
            'container': 'mp4',
            'video_profile': 'current',
            'src': urls,
            'size': total_size
        }

        return info

Example #28

0

Show file

File: live.py Project: wiseideal/ykdl

    def prepare(self):
        self.live = True
        if not self.vid:
            html = get_content(self.url)
            self.vid = match1(html, 'cid=([^&]+)')
            t = match1(html, '<title>([^<]+)').split('-')
            self.title = t[0]
            self.artist = t[1]

        info = get_content(
            'http://live.bilibili.com/api/playurl?cid={}'.format(self.vid))
        urls = [matchall(info, ['CDATA\[([^\]]+)'])[1]]
        size = float('inf')
        ext = 'flv'

        self.stream_types.append('current')
        self.streams['current'] = {
            'container': ext,
            'video_profile': 'current',
            'src': urls,
            'size': size
        }

Example #29

0

Show file

File: joy.py Project: netlovehf/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if not self.vid:
            self.vid = match1(self.url, 'resourceId=([0-9]+)')
        if not self.url:
            self.url = "http://www.joy.cn/video?resourceId={}".format(self.vid)

        html = get_content(self.url)

        info.title = match1(html, '<meta content=\"([^\"]+)')

        url = matchall(html, '<source src=\"([^\"]+)')[3]

        _, ext, size = url_info(url)

        info.stream_types.append('current')
        info.streams['current'] = {
            'container': ext,
            'src': [url],
            'size': size
        }
        return info

Example #30

0

Show file

    def prepare(self):
        info = VideoInfo(self.name, True)
        if not self.vid:
            html = get_content(self.url)
            self.vid = match1(html, 'var ROOMID = (\d+);')
            t = match1(html, '<title>([^<]+)').split('-')
            info.title = t[0]
            info.artist = t[1]

        data = get_content(
            'http://live.bilibili.com/api/playurl?cid={}'.format(self.vid))
        urls = [matchall(data, ['CDATA\[([^\]]+)'])[1]]
        size = float('inf')
        ext = 'flv'

        info.stream_types.append('current')
        info.streams['current'] = {
            'container': ext,
            'video_profile': 'current',
            'src': urls,
            'size': size
        }
        return info

Example #31

0

Show file

File: video.py Project: wwqgtxx/ykdl

    def get_vid_title(self):
        av_id = match1(self.url, '(?:/av|aid=)(\d+)')
        page_index = '1'
        if "#page=" in self.url or "?p=" in self.url or 'index_' in self.url:
            page_index = match1(self.url, '(?:#page|\?p)=(\d+)', 'index_(\d+)\.')
        if page_index == '1':
            self.url = 'https://www.bilibili.com/av{}/'.format(av_id)
        else:
            self.url = 'https://www.bilibili.com/av{}/?p={}'.format(av_id, page_index)
        if not self.vid:
            html = get_content(self.url)
            #vid = match1(html, 'cid=(\d+)', 'cid="(\d+)', '"cid":(\d+)')
            title = match1(html, '"title":"([^"]+)', '<h1 title="([^"]+)', '<title>([^<]+)').strip()
            video_list = matchall(html, ['"cid":(\d+),"page":(\d+),"from":"[^"]+","part":"([^"]*)",'])
            for cid, page, part in video_list:
               if page == page_index:
                   vid = cid
                   if len(video_list) > 1:
                       title = u'{} - {} - {}'.format(title, page, part)
                   elif part:
                       title = u'{} - {}'.format(title, part)
                   break

        return vid, title

Example #32

0

Show file

File: iqiyi.py Project: PureTV/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if self.url and not self.vid:
            vid = matchall(self.url, ['curid=([^_]+)_([\w]+)'])
            if vid:
                self.vid = vid[0]
                info.title = self.name + '_' + str(self.vid)

        if self.url and not self.vid:
            html = get_content(self.url)
            tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid=([^&]+)' , 'tvId:([^,]+)')
            videoid = match1(html, 'data-player-videoid="([^"]+)"', 'vid=([^&]+)', 'vid:"([^"]+)')
            self.vid = (tvid, videoid)
            info.title = match1(html, '<title>([^<]+)').split('-')[0]

        tvid, vid = self.vid
        vps_data = getvps(tvid, vid)
        assert vps_data['code'] == 'A00000', 'can\'t play this video!!'
        url_prefix = vps_data['data']['vp']['du']
        stream = vps_data['data']['vp']['tkl'][0]
        vs_array = stream['vs']
        for vs in vs_array:
            bid = vs['bid']
            fs_array = vs['fs']
            real_urls = []
            for seg_info in fs_array:
                url = url_prefix + seg_info['l']
                json_data=json.loads(get_content(url))
                down_url = json_data['l']
                real_urls.append(down_url)
            stream = self.vd_2_id[bid]
            info.stream_types.append(stream)
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {'video_profile': stream_profile, 'container': 'flv', 'src': real_urls, 'size' : 0}
        info.stream_types = sorted(info.stream_types, key = self.ids.index)
        return info

Example #33

0

Show file

File: mgtv.py Project: wwqgtxx/ykdl

    def prepare_list(self):

        html = get_content(self.url, headers={})

        return matchall(html, ['"a-pic-play" href="([^"]+)"'])

Example #34

0

Show file

 def prepare_list(self):
     html = get_content(self.url)
     return matchall(html, ['id=\"([^\"]+)\S title'])

Example #35

0

Show file

 def prepare_list(self):
     html = get_content(self.url)
     return matchall(html, ['video_id: ([^,]+)'])

Example #36

0

Show file

    def prepare_list(self):

        html = get_content(self.url, headers={})

        return matchall(html, ['"a-pic-play" href="([^"]+)"'])

Example #37

0

Show file

File: live.py Project: wwqgtxx/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     vids = matchall(html, ['vid="(\d+)"'])
     # fake urls
     return ['http://live.le.com/izt/vid={}'.format(vid) for vid in vids]

Example #38

0

Show file

File: iqiyi.py Project: liwenDeng/ykdl

    def prepare_list(self):
        html = get_content(self.url)

        return matchall(html, ['data-tvid=\"([^\"]+)\" data-vid=\"([^\"]+)\"'])

Example #39

0

Show file

File: le.py Project: flfq/ykdl

    def prepare_list(self):

        html = get_content(self.url)

        return matchall(html, ['vid="(\d+)"'])

Example #40

0

Show file

File: live.py Project: flfq/ykdl

    def prepare_list(self):

        html = get_content(self.url)
        return matchall(html, douyu_match_pattern)

Example #41

0

Show file

File: video.py Project: zhangn1985/ykdl

    def prepare(self):
        info = VideoInfo(self.name)

        if self.url and not self.vid:
            vid = matchall(self.url, ['curid=([^_]+)_([\w]+)'])
            if vid:
                self.vid = vid[0]
                info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + self.vid[0]
                try:
                    info_json = json.loads(get_content(info_u))
                    info.title = info_json['data']['vn']
                except:
                    self.vid = None

        def get_vid():
            html = get_content(self.url)
            video_info = match1(html, ":video-info='(.+?)'")

            if video_info:
                video_info = json.loads(video_info)
                self.vid = str(video_info['tvId']), str(video_info['vid'])
                info.title = video_info['name']

            else:
                tvid = match1(html,
                              'tvId:\s*"([^"]+)',
                              'data-video-tvId="([^"]+)',
                              '''\['tvid'\]\s*=\s*"([^"]+)''',
                              '"tvId":\s*([^,]+)')
                videoid = match1(html,
                                'data-video-vid="([^"]+)',
                                'vid:\s*"([^"]+)',
                                '''\['vid'\]\s*=\s*"([^"]+)''',
                                '"vid":\s*([^,]+)')
                if not (tvid and videoid):
                    url = match1(html, '(www\.iqiyi\.com/v_\w+\.html)')
                    if url:
                        self.url = 'https://' + url
                        return get_vid()
                self.vid = (tvid, videoid)
                info.title = match1(html, '<title>([^<]+)').split('-')[0]

        if self.url and not self.vid:
            get_vid()
        tvid, vid = self.vid
        assert tvid and vid, 'can\'t play this video!!'

        def push_stream_vd(vs):
            vd = vs['vd']
            stream = self.vd_2_id[vd]
            if not stream in info.streams:
                info.stream_types.append(stream)
            elif int(vd) < 10: 
                return
            m3u8 = vs['m3utx']
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {
                'video_profile': stream_profile,
                'container': 'm3u8',
                'src': [m3u8],
                'size': 0
            }

        def push_stream_bid(bid, container, fs_array, size):
            stream = self.vd_2_id[bid]
            if stream in info.streams:
                return
            real_urls = []
            for seg_info in fs_array:
                url = url_prefix + seg_info['l']
                json_data = json.loads(get_content(url))
                down_url = json_data['l']
                real_urls.append(down_url)
            info.stream_types.append(stream)
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {
                'video_profile': stream_profile,
                'container': container,
                'src': real_urls,
                'size': size
            }

        try:
            # try use tmts first
            # less http requests, get results quickly
            tmts_data = gettmts(tvid, vid)
            self.logger.debug('tmts_data:\n' + str(tmts_data))
            assert tmts_data['code'] == 'A00000', 'can\'t play this video!!'
            vs_array = tmts_data['data']['vidl']
            for vs in vs_array:
                push_stream_vd(vs)
            vip_conf = tmts_data['data'].get('ctl', {}).get('configs')
            if vip_conf:
                for vds in (('10', '19'), ('18', '5')):
                    for vd in vds:
                        if vd in vip_conf:
                            tmts_data = gettmts(tvid, vip_conf[vd]['vid'])
                            if tmts_data['code'] == 'A00000':
                                push_stream_vd(tmts_data['data'])
                                break

        except:
            try:
                # use vps as preferred fallback
                vps_data = getvps(tvid, vid)
                self.logger.debug('vps_data:\n' + str(vps_data))
                assert vps_data['code'] == 'A00000', 'can\'t play this video!!'
                url_prefix = vps_data['data']['vp']['du']
                vs_array = vps_data['data']['vp']['tkl'][0]['vs']
                for vs in vs_array:
                    bid = vs['bid']
                    fs_array = vs['fs']
                    size = vs['vsize']
                    push_stream_bid(bid, 'flv', fs_array, size)

            except:
                # use dash as fallback
                for bid in (500, 300, 200, 100):
                    dash_data = getdash(tvid, vid, bid)
                    self.logger.debug('dash_data:\n' + str(dash_data))
                    assert dash_data['code'] == 'A00000', 'can\'t play this video!!'
                    url_prefix = dash_data['data']['dd']
                    streams = dash_data['data']['program']['video']
                    for stream in streams:
                        if 'fs' in stream:
                            _bid = stream['bid']
                            container = stream['ff']
                            fs_array = stream['fs']
                            size = stream['vsize']
                            break
                    push_stream_bid(_bid, container, fs_array, size)

        info.stream_types = sorted(info.stream_types, key=self.ids.index)
        return info

Example #42

0

Show file

    def prepare_list(self):
        html = get_content(self.url)

        return matchall(html, ['data-tvid=\"([^\"]+)\" data-vid=\"([^\"]+)\"'])

Example #43

0

Show file

File: iqiyi.py Project: liwenDeng/ykdl

    def prepare(self):
        info = VideoInfo(self.name)
        if self.url and not self.vid:
            vid = matchall(self.url, ['curid=([^_]+)_([\w]+)'])
            if vid:
                self.vid = vid[0]

        if self.url and not self.vid:
            html = get_content(self.url)
            tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid=([^&]+)' , 'tvId:([^,]+)')
            videoid = match1(html, 'data-player-videoid="([^"]+)"', 'vid=([^&]+)', 'vid:"([^"]+)')
            self.vid = (tvid, videoid)
            info.title = match1(html, '<title>([^<]+)').split('-')[0]

        tvid, vid = self.vid
        data = getVMS(tvid, vid)
        if not data['code'] == 'A00000':
            for bid in self.id_h5:
                h5_data = geth5VMS(tvid, vid, bid)
                if h5_data["code"] == "A00000":
                    stream = self.vd_2_id[bid]
                    profile = self.id_2_profile[stream]
                    info.title = h5_data['data']['playInfo']['vn']
                    info.stream_types.append(stream)
                    info.streams[stream] = {'container': 'mp4', 'video_profile': profile, 'src' : [h5_data['data']['m3u']], 'size' : 0}
            return info

        for stream in data['data']['vidl']:
            try:
                stream_id = self.vd_2_id[stream['vd']]
                if stream_id in info.stream_types or stream_id in self.id_ignore:
                    continue
                stream_profile = self.id_2_profile[stream_id]
                info.stream_types.append(stream_id)
                info.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0}
            except:
                log.i("vd: {} is not handled".format(stream['vd']))
                log.i("info is {}".format(stream))

        # why I need do below???
        try:
            vip_vds = data['data']['ctl']['vip']['bids']
            vip_conf = data['data']['ctl']['configs']
        except:
            info.stream_types = sorted(info.stream_types, key = self.ids.index)
            return info

        if not 'BD' in info.stream_types:
            p1080_vids = []
            if 5 in vip_vds:
                p1080_vids.append(vip_conf['5']['vid'])
            for v in p1080_vids:
                p1080_info = getVMS(tvid, v)
                if p1080_info['code'] == 'A00000':
                    p1080_url = p1080_info['data']['m3u']
                    info.stream_types.append('BD')
                    info.streams['BD'] = {'video_profile': '1080p', 'container': 'm3u8', 'src': [p1080_url], 'size' : 0}
                    break

        if not '4k' in info.stream_types:
            k4_vids = []
            if 10 in vip_vds:
                k4_vids.append(vip_conf['10']['vid'])
            for v in k4_vids:
                k4_info = getVMS(tvid, v)
                if k4_info['code'] == 'A00000':
                    k4_url = k4_info['data']['m3u']
                    info.stream_types.append('4k')
                    info.streams['4k'] = {'video_profile': '4k', 'container': 'm3u8', 'src': [k4_url], 'size' : 0}
                    break

        info.stream_types = sorted(info.stream_types, key = self.ids.index)
        return info

Example #44

0

Show file

File: video.py Project: liwenDeng/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     return matchall(html, ['id=\"([^\"]+)\S title'])

Example #45

0

Show file

File: miaopai.py Project: wwqgtxx/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     video_list = match1(html, 'video_list=\[([^\]]+)')
     return matchall(video_list, ['\"([^\",]+)'])

Example #46

0

Show file

File: letvcloud.py Project: PureTV/ykdl

    def prepare(self):

        if self.url and not self.vid:
            #maybe error!!
            self.vid = (vu, uu) = matchall(self.url, ["vu=([^&]+)","uu=([^&]+)"])
        return self.letvcloud_download_by_vu()

Example #47

0

Show file

File: live.py Project: wwqgtxx/ykdl

    def prepare_list(self):

        html = get_content(self.url)
        return matchall(html, douyu_match_pattern)

Example #48

0

Show file

    def prepare(self):
        info = VideoInfo(self.name)

        if self.url and not self.vid:
            vid = matchall(self.url, ['curid=([^_]+)_([\w]+)'])
            if vid:
                self.vid = vid[0]
                info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + self.vid[
                    0]
                try:
                    info_json = json.loads(get_content(info_u))
                    info.title = info_json['data']['vn']
                except:
                    self.vid = None

        def get_vid():
            html = get_content(self.url)
            video_info = match1(html, ":video-info='(.+?)'")

            if video_info:
                video_info = json.loads(video_info)
                self.vid = str(video_info['tvId']), str(video_info['vid'])
                info.title = video_info['name']

            else:
                tvid = match1(html, 'tvId:\s*"([^"]+)',
                              'data-video-tvId="([^"]+)',
                              '''\['tvid'\]\s*=\s*"([^"]+)''',
                              '"tvId":\s*([^,]+)')
                videoid = match1(html, 'data-video-vid="([^"]+)',
                                 'vid"?\'?\]?\s*(?:=|:)\s*"?\'?([^"\',]+)')
                if not (tvid and videoid):
                    url = match1(html, '(www\.iqiyi\.com/v_\w+\.html)')
                    if url:
                        self.url = 'https://' + url
                        return get_vid()
                self.vid = (tvid, videoid)
                info.title = match1(html, '<title>([^<]+)').split('-')[0]

        if self.url and not self.vid:
            get_vid()
        tvid, vid = self.vid
        assert tvid and vid, 'can\'t play this video!!'

        def push_stream_vd(vs):
            vd = vs['vd']
            stream = self.vd_2_id[vd]
            if stream in info.streams:
                # prefer H264 than H265
                if vs.get('fileFormat') == 'H265':
                    return
            else:
                info.stream_types.append(stream)
            m3u8 = vs['m3utx']
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {
                'video_profile': stream_profile,
                'container': 'm3u8',
                'src': [m3u8],
                'size': 0
            }

        def push_stream_bid(bid, container, fs_array, size):
            stream = self.vd_2_id[bid]
            if stream in info.streams:
                return
            real_urls = []
            for seg_info in fs_array:
                url = url_prefix + seg_info['l']
                json_data = json.loads(get_content(url))
                down_url = json_data['l']
                real_urls.append(down_url)
            info.stream_types.append(stream)
            stream_profile = self.id_2_profile[stream]
            info.streams[stream] = {
                'video_profile': stream_profile,
                'container': container,
                'src': real_urls,
                'size': size
            }

        try:
            # try use tmts first
            # less http requests, get results quickly
            tmts_data = gettmts(tvid, vid)
            self.logger.debug('tmts_data:\n' + str(tmts_data))
            assert tmts_data['code'] == 'A00000', 'can\'t play this video!!'
            vs_array = tmts_data['data']['vidl']
            for vs in vs_array:
                push_stream_vd(vs)
            vip_conf = tmts_data['data'].get('ctl', {}).get('configs')
            if vip_conf:
                # prefer H264 than H265
                for vds in (('5', '18'), ('10', '19')):
                    for vd in vds:
                        if vd in vip_conf:
                            tmts_data = gettmts(tvid, vip_conf[vd]['vid'])
                            if tmts_data['code'] == 'A00000':
                                push_stream_vd(tmts_data['data'])
                                break

        except:
            try:
                # use vps as preferred fallback
                vps_data = getvps(tvid, vid)
                self.logger.debug('vps_data:\n' + str(vps_data))
                assert vps_data['code'] == 'A00000', 'can\'t play this video!!'
                url_prefix = vps_data['data']['vp']['du']
                vs_array = vps_data['data']['vp']['tkl'][0]['vs']
                for vs in vs_array:
                    bid = vs['bid']
                    fs_array = vs['fs']
                    size = vs['vsize']
                    push_stream_bid(bid, 'flv', fs_array, size)

            except:
                # use dash as fallback
                for bid in (500, 300, 200, 100):
                    dash_data = getdash(tvid, vid, bid)
                    self.logger.debug('dash_data:\n' + str(dash_data))
                    assert dash_data[
                        'code'] == 'A00000', 'can\'t play this video!!'
                    url_prefix = dash_data['data']['dd']
                    streams = dash_data['data']['program']['video']
                    for stream in streams:
                        if 'fs' in stream:
                            _bid = stream['bid']
                            container = stream['ff']
                            fs_array = stream['fs']
                            size = stream['vsize']
                            push_stream_bid(_bid, container, fs_array, size)
                            break

        assert info.streams, 'can\'t play this video!!'
        info.stream_types = sorted(info.stream_types, key=self.ids.index)
        return info

Example #49

0

Show file

File: video.py Project: Yu1984/ykdl

 def download_playlist(self, url, param):
     html = get_content(url)
     vids = matchall(html, ['id=\"([^\"]+)\S title'])
     for vid in vids:
         self.download(vid, param)

Example #50

0

Show file

 def get_path_list(self):
     html = get_content(self.url)
     videos = matchall(html,
                       ['href=[\'"](/v/[a-zA-Z0-9_]+)[\'"] title=[\'"]'])
     return videos

Example #51

0

Show file

File: le.py Project: togitss/ykdl

    def prepare_list(self):

        html = get_content(self.url)

        return matchall(html, ['vid="(\d+)"'])

Example #52

0

Show file

File: video.py Project: Yu1984/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     video_list = matchall(html, ['<option value=\'([^\']*)\''])
     return ['http://www.bilibili.com'+v for v in video_list]

Example #53

0

Show file

 def prepare_list(self):
     html = get_content(self.url)
     video_list = match1(html, 'video_list=\[([^\]]+)')
     return matchall(video_list, ['\"([^\",]+)'])

Example #54

0

Show file

File: video.py Project: PureTV/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     return matchall(html, ['video_id: ([^,]+)'])

Example #55

0

Show file

File: bangumi.py Project: wwqgtxx/ykdl

 def prepare_list(self):
     html = get_content(self.url)
     eplist = match1(html, '"epList":(\[.+?\])')
     if eplist:
         eplist = matchall(eplist, [',"id":(\d+),'])
         return ['https://www.bilibili.com/bangumi/play/ep{}'.format(eid) for eid in eplist]