Python get_content Examples, lulu.common.get_content Python Examples

Example #1

0

Show file

File: naver.py Project: hhy5277/Lulu-1

def naver_download_by_url(url, info_only=False, **kwargs):
    ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}'
    page = get_content(url)
    og_video_url = re.search(
        r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page
    ).group(1)
    params_dict = urllib.parse.parse_qs(
        urllib.parse.urlparse(og_video_url).query
    )
    vid = params_dict['vid'][0]
    key = params_dict['outKey'][0]
    meta_str = get_content(ep.format(vid, key))
    meta_json = json.loads(meta_str)
    if 'errorCode' in meta_json:
        log.wtf(meta_json['errorCode'])
    title = meta_json['meta']['subject']
    videos = meta_json['videos']['list']
    video_list = sorted(
        videos, key=lambda video: video['encodingOption']['width']
    )
    video_url = video_list[-1]['source']
    size = url_size(video_url)
    print_info(site_info, title, 'mp4', size)
    if not info_only:
        download_urls([video_url], title, 'mp4', size, **kwargs)

Example #2

0

Show file

def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_content(url)
    title = unescape_html('|'.join(
        match1(html, r'<title>(.*?)</title>').split('|')[:-2]))

    # mgid%3Auma%3Avideo%3Amtv81.com%3A897974
    vid = match1(html, r'getTheVideo\("(.*?)"')
    xml = parseString(
        get_content(
            'http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&'
            'flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456'.format(vid)))

    url = sorted(map(lambda x: x.firstChild.nodeValue,
                     xml.getElementsByTagName("src")),
                 key=lambda x: int(match1(x, r'_(\d+?)_')))[-1]

    mediatype, ext, size = 'mp4', 'mp4', 0
    print_info(site_info, title, mediatype, size)
    # rtmpdump  -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf  # noqa

    # because rtmpdump is unstable,may try serveral times
    if not info_only:
        download_rtmp_url(
            url=url,
            title=title,
            ext=ext,
            params={
                '--swfVfy':
                ('http://media.mtvnservices.com/player/prime/mediaplayer'
                 'prime.1.10.8.swf')
            },
            output_dir=output_dir)

Example #3

0

Show file

File: videomega.py Project: zuichusixu/Lulu

def videomega_download(url, info_only=False, **kwargs):
    # Hot-plug cookie handler
    ssl_context = request.HTTPSHandler(
        context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
    cookie_handler = request.HTTPCookieProcessor()
    opener = request.build_opener(ssl_context, cookie_handler)
    opener.addheaders = [('Referer', url), ('Cookie', 'noadvtday=0')]
    request.install_opener(opener)

    if re.search(r'view\.php', url):
        php_url = url
    else:
        content = get_content(url)
        m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"',
                      content)
        ref = m.group(1)
        width, height = m.group(2), m.group(3)
        php_url = (
            'http://videomega.tv/view.php?ref={}&width={}&height={}'.format(
                ref, width, height))
    content = get_content(php_url)

    title = match1(content, r'<title>(.*)</title>')
    js = match1(content, r'(eval.*)')
    t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
    t = re.sub(r'(\w)', r'{\1}', t)
    t = t.translate({87 + i: str(i) for i in range(10, 36)})
    s = match1(js, r"'([^']+)'\.split").split('|')
    src = t.format(*s)

    _type, ext, size = url_info(src)

    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([src], title, ext, size, **kwargs)

Example #4

0

Show file

def douyutv_video_download(url,
                           output_dir='.',
                           merge=True,
                           info_only=False,
                           **kwargs):
    ep = 'http://vmobile.douyu.com/video/getInfo?vid='
    patt = r'show/([0-9A-Za-z]+)'
    title_patt = r'<h1>(.+?)</h1>'

    hit = re.search(patt, url)
    if hit is None:
        log.wtf('Unknown url pattern')
    vid = hit.group(1)

    page = get_content(url)
    hit = re.search(title_patt, page)
    if hit is None:
        title = vid
    else:
        title = hit.group(1)

    meta = json.loads(get_content(ep + vid))
    if meta['error'] != 0:
        log.wtf('Error from API server')
    m3u8_url = meta['data']['video_url']
    print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url)
    if not info_only:
        urls = general_m3u8_extractor(m3u8_url)
        download_urls(urls,
                      title,
                      'ts',
                      0,
                      output_dir=output_dir,
                      merge=merge,
                      **kwargs)

Example #5

0

Show file

File: bilibili.py Project: youmuyou/Lulu

    def live_entry(self, **kwargs):
        # Extract room ID from the short display ID (seen in the room
        # URL). The room ID is usually the same as the short ID, but not
        # always; case in point: https://live.bilibili.com/48, with 48
        # as the short ID and 63727 as the actual ID.
        room_short_id = re.search(
            r'live.bilibili.com/([^?]+)', self.url
        ).group(1)
        room_init_api_response = json.loads(get_content(
            self.live_room_init_api_url.format(room_short_id)
        ))
        self.room_id = room_init_api_response['data']['room_id']

        room_info_api_response = json.loads(get_content(
            self.live_room_info_api_url.format(self.room_id)
        ))
        self.title = room_info_api_response['data']['title']

        api_url = self.live_api.format(self.room_id)
        json_data = json.loads(get_content(api_url))
        urls = [json_data['durl'][0]['url']]

        self.streams['live'] = {}
        self.streams['live']['src'] = urls
        self.streams['live']['container'] = 'flv'
        self.streams['live']['size'] = 0

Example #6

0

Show file

    def prepare(self, **kwargs):
        if self.url and not self.vid:
            if not re.match(r'http://v.pptv.com/show/(\w+)\.html', self.url):
                raise ('Unknown url pattern')
            page_content = get_content(self.url)
            self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)')

        if not self.vid:
            raise ('Cannot find id')
        self.referer = self.url
        api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid)
        api_url += (
            '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type='
            '&version=4')
        dom = parseString(get_content(api_url))
        self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom)
        xml_streams = merge_meta(m_items, m_streams, m_segs)
        for stream_id in xml_streams:
            stream_data = xml_streams[stream_id]
            src = make_url(stream_data)
            self.streams[stream_id] = {
                'container': 'mp4',
                'video_profile': stream_data['res'],
                'size': int(stream_data['size']),
                'src': src
            }

Example #7

0

Show file

File: nicovideo.py Project: hhy5277/Lulu-1

def nicovideo_download(url, info_only=False, **kwargs):
    import ssl
    ssl_context = request.HTTPSHandler(
        context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
    cookie_handler = request.HTTPCookieProcessor()
    opener = request.build_opener(ssl_context, cookie_handler)
    request.install_opener(opener)

    import netrc
    import getpass
    try:
        info = netrc.netrc().authenticators('nicovideo')
    except Exception:
        info = None
    if info is None:
        user = input('User:     '******'Password: '******'Logging in...')
    nicovideo_login(user, password)

    html = get_content(url)  # necessary!
    title = match1(html, r'<title>(.+?)</title>')

    vid = url.split('/')[-1].split('?')[0]
    api_html = get_content(
        'http://flapi.nicovideo.jp/api/getflv?v={}'.format(vid))
    real_url = parse.unquote(match1(api_html, r'url=([^&]+)&'))

    _type, ext, size = url_info(real_url)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([real_url], title, ext, size, **kwargs)

Example #8

0

Show file

File: xiami.py Project: hhy5277/Lulu-1

def xiami_download_mv(url, output_dir='.', merge=True, info_only=False):
    # FIXME: broken merge
    page = get_content(url)
    title = re.findall('<title>([^<]+)', page)[0]
    vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0]
    api_url = (
        'http://cloud.video.taobao.com/videoapi/info.php?vid={}&uid={}'.format(
            vid, uid))
    result = get_content(api_url)
    doc = parseString(result)
    video_url = doc.getElementsByTagName('video_url')[-1].firstChild.nodeValue
    length = int(doc.getElementsByTagName('length')[-1].firstChild.nodeValue)

    v_urls = []
    k_start = 0
    total_size = 0
    while True:
        k_end = k_start + 20000000
        if k_end >= length:
            k_end = length - 1
        v_url = video_url + '/start_{}/end_{}/1.flv'.format(k_start, k_end)
        try:
            _, ext, size = url_info(v_url)
        except Exception:
            break
        v_urls.append(v_url)
        total_size += size
        k_start = k_end + 1

    print_info(site_info, title, ext, total_size)
    if not info_only:
        download_urls(v_urls, title, ext, total_size, output_dir, merge=merge)

Example #9

0

Show file

File: xiami.py Project: hhy5277/Lulu-1

def xiami_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    # albums
    if re.match(r'http://www.xiami.com/album/\d+', url):
        _id = match1(url, r'http://www.xiami.com/album/(\d+)')
        xiami_download_album(_id, output_dir, info_only)
    elif re.match(r'http://www.xiami.com/album/\w+', url):
        page = get_content(url)
        album_id = re.search(
            r'rel="canonical"\s+href="http://www.xiami.com/album/([^"]+)"',
            page).group(1)
        xiami_download_album(album_id, output_dir, info_only)

    # collections
    if re.match(r'http://www.xiami.com/collect/\d+', url):
        _id = match1(url, r'http://www.xiami.com/collect/(\d+)')
        xiami_download_showcollect(_id, output_dir, info_only)

    # single track
    if re.match(r'http://www.xiami.com/song/\d+\b', url):
        _id = match1(url, r'http://www.xiami.com/song/(\d+)')
        xiami_download_song(_id, output_dir, info_only)
    elif re.match(r'http://www.xiami.com/song/\w+', url):
        html = get_content(url)
        _id = match1(
            html, r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"')
        xiami_download_song(_id, output_dir, info_only)

    if re.match('http://www.xiami.com/song/detail/id/\d+', url):
        _id = match1(url, r'http://www.xiami.com/song/detail/id/(\d+)')
        xiami_download_song(_id, output_dir, info_only)

    if re.match('http://www.xiami.com/mv', url):
        xiami_download_mv(url, output_dir, merge=merge, info_only=info_only)

Example #10

0

Show file

File: vimeo.py Project: hhy5277/Lulu-1

    def prepare(self, **kwargs):
        headers = FAKE_HEADERS.copy()
        if 'referer' in kwargs:
            headers['Referer'] = kwargs['referer']

        try:
            page = get_content('https://vimeo.com/{}'.format(self.vid))
            cfg_patt = r'clip_page_config\s*=\s*(\{.+?\});'
            cfg = json.loads(match1(page, cfg_patt))
            video_page = get_content(
                cfg['player']['config_url'], headers=headers
            )
            self.title = cfg['clip']['title']
            info = json.loads(video_page)
        except Exception as e:
            page = get_content('https://player.vimeo.com/video/{}'.format(
                self.vid
            ))
            self.title = match1(page, r'<title>([^<]+)</title>')
            info = json.loads(match1(page, r'var t=(\{.+?\});'))

        plain = info['request']['files']['progressive']
        for s in plain:
            meta = dict(src=[s['url']], container='mp4')
            meta['video_profile'] = '{}x{}'.format(s['width'], s['height'])
            for stream in self.__class__.stream_types:
                if s['quality'] == stream['id']:
                    self.streams[s['quality']] = meta
        self.master_m3u8 = info['request']['files']['hls']['cdns']

Example #11

0

Show file

def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    html = get_content(url)
    video_id = match1(url, r'vine.co/v/([^/]+)')
    title = match1(html, r'<title>([^<]*)</title>')
    stream = match1(
        html,
        r'<meta property="twitter:player:stream" content="([^"]*)">'
    )
    if not stream:  # https://vine.co/v/.../card
        stream = match1(html, r'"videoUrl":"([^"]+)"')
        if stream:
            stream = stream.replace('\\/', '/')
        else:
            posts_url = 'https://archive.vine.co/posts/{}.json'.format(
                video_id
            )
            json_data = json.loads(get_content(posts_url))
            stream = json_data['videoDashUrl']
            title = json_data['description']
            if title == '':
                title = '{}_{}'.format(
                    json_data['username'].replace(' ', '_'), video_id
                )

    mime, ext, size = url_info(stream)

    print_info(site_info, title, mime, size)
    if not info_only:
        download_urls([stream], title, ext, size, output_dir, merge=merge)

Example #12

0

Show file

File: twitter.py Project: hhy5277/Lulu-1

def extract_m3u(source):
    r1 = get_content(source)
    s1 = re.findall(r'(/ext_tw_video/.*)', r1)
    s1 += re.findall(r'(/amplify_video/.*)', r1)
    r2 = get_content('https://video.twimg.com{}'.format(s1[-1]))
    s2 = re.findall(r'(/ext_tw_video/.*)', r2)
    s2 += re.findall(r'(/amplify_video/.*)', r2)
    return ['https://video.twimg.com{}'.format(i) for i in s2]

Example #13

0

Show file

def sohu_download(url, info_only=False, **kwargs):
    if re.match(r'http://share.vrs.sohu.com', url):
        vid = match1(url, 'id=(\d+)')
    else:
        html = get_content(url)
        vid = match1(html, r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?')
    assert vid

    if re.match(r'http[s]://tv.sohu.com/', url):
        info = json.loads(
            get_content(
                'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'.format(vid)))
        for qtyp in ['oriVid', 'superVid', 'highVid', 'norVid', 'relativeId']:
            if 'data' in info:
                hqvid = info['data'][qtyp]
            else:
                hqvid = info[qtyp]
            if hqvid != 0 and hqvid != vid:
                info = json.loads(
                    get_content(
                        'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'.
                        format(hqvid)))
                if 'allot' not in info:
                    continue
                break
        host = info['allot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        title = data['tvName']
        size = sum(data['clipsBytes'])
        assert len(data['clipsURL']) == len(data['clipsBytes']) \
            == len(data['su'])
        for new, clip, ck in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host, hqvid, tvid, new, clipURL, ck))

    else:
        info = json.loads(
            get_content(
                'http://my.tv.sohu.com/play/videonew.do?vid={}&referer='
                'http://my.tv.sohu.com'.format(vid)))
        host = info['allot']
        tvid = info['tvid']
        urls = []
        data = info['data']
        title = data['tvName']
        size = sum(map(int, data['clipsBytes']))
        assert len(data['clipsURL']) == len(data['clipsBytes']) \
            == len(data['su'])
        for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']):
            clipURL = urlparse(clip).path
            urls.append(real_url(host, vid, tvid, new, clipURL, ck))

    print_info(site_info, title, 'mp4', size)
    if not info_only:
        download_urls(urls, title, 'mp4', size, refer=url, **kwargs)

Example #14

0

Show file

File: qq.py Project: zzlettle/Lulu

def qq_download_by_vid(vid,
                       title,
                       output_dir='.',
                       merge=True,
                       info_only=False):
    info_api = ('http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333'
                '&platform=11&defnpayver=1&vid={}'.format(vid))
    info = get_content(info_api)
    video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1])
    fn_pre = video_json['vl']['vi'][0]['lnk']
    title = video_json['vl']['vi'][0]['ti']
    host = video_json['vl']['vi'][0]['ul']['ui'][0]['url']
    streams = video_json['fl']['fi']
    seg_cnt = video_json['vl']['vi'][0]['cl']['fc']
    if seg_cnt == 0:
        seg_cnt = 1

    # best_quality = streams[-1]['name']
    part_format_id = streams[-1]['id']

    part_urls = []
    total_size = 0
    for part in range(1, seg_cnt + 1):
        filename = '{}.p{}.{}.mp4'.format(fn_pre, str(part_format_id % 10000),
                                          str(part))
        key_api = ('http://vv.video.qq.com/getkey?otype=json&platform=11&'
                   'format={}&vid={}&filename={}&appver=3.2.19.333'.format(
                       part_format_id, vid, filename))
        part_info = get_content(key_api)
        key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1])
        if key_json.get('key') is None:
            vkey = video_json['vl']['vi'][0]['fvkey']
            url = '{}{}?vkey={}'.format(
                video_json['vl']['vi'][0]['ul']['ui'][0]['url'],
                fn_pre + '.mp4', vkey)
        else:
            vkey = key_json['key']
            url = '{}{}?vkey={}'.format(host, filename, vkey)
        if not vkey:
            if part == 1:
                log.wtf(key_json['msg'])
            else:
                log.w(key_json['msg'])
            break

        part_urls.append(url)
        _, ext, size = url_info(url)
        total_size += size

    print_info(site_info, title, ext, total_size)
    if not info_only:
        download_urls(part_urls,
                      title,
                      ext,
                      total_size,
                      output_dir=output_dir,
                      merge=merge)

Example #15

0

Show file

File: pixnet.py Project: hhy5277/Lulu-1

def pixnet_download(url, info_only=False, **kwargs):
    if not re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url):
        log.wtf('[Failed] Unsupported URL pattern.')
        return
    # http://eric6513.pixnet.net/album/video/206644535
    html = get_content(url)
    title = ''.join(match1(
        html, r'<meta property="og:description\" content="([^"]*)"'
    ).split('-')[1:]).strip()

    time_now = int(time())

    m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url)

    username = m.group(1)
    # eric6513
    _id = m.group(2)
    # 206644535

    data_dict = {
        'username': username, 'autoplay': 1, 'id': _id, 'loop': 0,
        'profile': 9, 'time': time_now,
    }
    # have to be like this
    data_dict_str = quote(str(data_dict).replace("'", '"'), safe='"')
    url2 = 'http://api.pixnet.tv/content?type=json&customData={}'.format(
        data_dict_str
    )
    # &sig=edb07258e6a9ff40e375e11d30607983  can be blank for now
    # if required, can be obtained from url like
    # http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js
    # http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7}

    video_json = get_content(url2)
    content = json.loads(video_json)
    url_main = content['element']['video_url']
    url_backup = content['element']['backup_video_uri']

    try:
        # In some rare cases the main URL is IPv6 only...
        # Something like #611
        url_info(url_main)
        url = url_main
    except Exception:
        url = url_backup

    _type, ext, size = url_info(url)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([url], title, ext, size, **kwargs)

Example #16

0

Show file

def iwara_download(url, info_only=False, **kwargs):
    video_hash = match1(url, r'http://\w+.iwara.tv/videos/(\w+)')
    video_url = match1(url, r'(http://\w+.iwara.tv)/videos/\w+')
    html = get_content(url, headers=headers)
    title = match1(html, r'<title>(.*)</title>')
    api_url = '{}/api/video/{}'.format(video_url, video_hash)
    content = get_content(api_url, headers=headers)
    data = json.loads(content)
    _type, ext, size = url_info(data[0]['uri'], headers=headers)
    down_urls = data[0]['uri']
    print_info(down_urls, title, _type, size)

    if not info_only:
        download_urls([down_urls], title, ext, size, headers=headers, **kwargs)

Example #17

0

Show file

def get_single_photo_url(url):
    page = get_content(url)
    pid = get_photo_id(url, page)
    title = match1(page, pattern_inline_title)
    if match1(page, pattern_inline_video_mark):
        api_key = get_api_key(page)
        reply = get_content(
            tmpl_api_call_photo_info(api_key, get_photo_id(url, page))
        )
        secret = json.loads(reply)['photo']['secret']
        return get_orig_video_source(api_key, pid, secret), title
    # last match always has the best resolution
    match = match1(page, pattern_inline_img_url)
    return 'https:{}'.format(match.replace('\\', '')), title

Example #18

0

Show file

File: kuwo.py Project: hhy5277/Lulu-1

def kuwo_download_by_rid(rid, info_only=False, **kwargs):
    html = get_content(
        'http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid='
        'MUSIC_{}'.format(rid))
    title = match1(html, r'<name>(.*)</name>')
    if not title:
        title = rid
    # format =aac|mp3 ->to get aac format=mp3 ->to get mp3
    url = get_content(
        'http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_{}&'
        'type=convert_url&response=url'.format(rid))
    songtype, ext, size = url_info(url)
    print_info(site_info, title, songtype, size)
    if not info_only:
        download_urls([url], title, ext, size, **kwargs)

Example #19

0

Show file

def douyutv_download(url,
                     output_dir='.',
                     merge=True,
                     info_only=False,
                     **kwargs):
    if 'v.douyu.com/show/' in url:
        douyutv_video_download(url,
                               output_dir=output_dir,
                               merge=merge,
                               info_only=info_only,
                               **kwargs)
        return
    url = re.sub(r'[\w.]*douyu.com', 'm.douyu.com', url)
    html = get_content(url)
    room_id_patt = r'room_id\s*:\s*(\d+),'
    room_id = match1(html, room_id_patt)
    if room_id == '0':
        room_id = url[url.rfind('/') + 1:]

    api_url = 'http://www.douyutv.com/api/v1/'
    args = 'room/{}?aid=wp&client_sys=wp&time={}'.format(
        room_id, int(time.time()))
    auth_md5 = (args + 'zNzMV1y4EMxOHS6I5WKm').encode('utf-8')
    auth_str = hashlib.md5(auth_md5).hexdigest()
    json_request_url = '{}{}&auth={}'.format(api_url, args, auth_str)

    content = get_content(json_request_url)
    json_content = json.loads(content)
    data = json_content['data']
    server_status = json_content.get('error', 0)
    if server_status is not 0:
        raise ValueError('Server returned error: {}'.format(server_status))

    title = data.get('room_name')
    show_status = data.get('show_status')
    if show_status is not '1':
        raise ValueError(
            'The live stream is not online! (Errno: {})'.format(server_status))

    real_url = '{}/{}'.format(data.get('rtmp_url'), data.get('rtmp_live'))
    print_info(site_info, title, 'flv', float('inf'))
    if not info_only:
        download_url_ffmpeg(real_url,
                            title,
                            'flv',
                            None,
                            output_dir=output_dir,
                            merge=merge)

Example #20

0

Show file

File: baomihua.py Project: hhy5277/Lulu-1

def baomihua_download_by_id(_id,
                            title=None,
                            output_dir='.',
                            merge=True,
                            info_only=False,
                            **kwargs):
    html = get_content(
        'http://play.baomihua.com/getvideourl.aspx?flvid={}&devicetype='
        'phone_app'.format(_id))
    host = match1(html, r'host=([^&]*)')
    assert host
    _type = match1(html, r'videofiletype=([^&]*)')
    assert _type
    vid = match1(html, r'&stream_name=([^&]*)')
    assert vid
    dir_str = match1(html, r'&dir=([^&]*)').strip()
    url = 'http://{}/{}/{}.{}'.format(host, dir_str, vid, _type)
    _, ext, size = url_info(url)
    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls([url],
                      title,
                      ext,
                      size,
                      output_dir,
                      merge=merge,
                      **kwargs)

Example #21

0

Show file

File: xiami.py Project: hhy5277/Lulu-1

def xiami_download_song(sid, output_dir='.', info_only=False):
    xml = get_content(
        'http://www.xiami.com/song/playlist/id/{}/object_name/default/'
        'object_id/0'.format(sid))
    doc = parseString(xml)
    i = doc.getElementsByTagName('track')[0]
    artist = i.getElementsByTagName('artist')[0].firstChild.nodeValue
    album_name = i.getElementsByTagName('album_name')[0].firstChild.nodeValue
    song_title = i.getElementsByTagName('name')[0].firstChild.nodeValue
    url = location_dec(
        i.getElementsByTagName('location')[0].firstChild.nodeValue)
    try:
        lrc_url = i.getElementsByTagName('lyric')[0].firstChild.nodeValue
    except Exception:
        pass
    type_, ext, size = url_info(url)
    if not ext:
        ext = 'mp3'

    print_info(site_info, song_title, ext, size)
    if not info_only:
        file_name = '{} - {} - {}'.format(song_title, artist, album_name)
        download_urls([url], file_name, ext, size, output_dir)
        try:
            xiami_download_lyric(lrc_url, file_name, output_dir)
        except Exception:
            pass

Example #22

0

Show file

File: sina.py Project: hhy5277/Lulu-1

def sina_zxt(url, info_only=False, **kwargs):
    ep = 'http://s.video.sina.com.cn/video/play?video_id='
    frag = urllib.parse.urlparse(url).fragment
    if not frag:
        log.wtf('No video specified with fragment')
    meta = json.loads(get_content(ep + frag))
    if meta['code'] != 1:
        # Yes they use 1 for success.
        log.wtf(meta['message'])
    title = meta['data']['title']
    videos = sorted(meta['data']['videos'], key=lambda i: int(i['size']))

    if len(videos) == 0:
        log.wtf('No video file returned by API server')

    vid = videos[-1]['file_id']
    container = videos[-1]['type']
    size = int(videos[-1]['size'])

    if container == 'hlv':
        container = 'flv'

    urls, _, _ = video_info(api_req(vid))
    print_info(site_info, title, container, size)
    if not info_only:
        download_urls(urls, title, container, size, **kwargs)
    return

Example #23

0

Show file

File: sina.py Project: hhy5277/Lulu-1

def sina_download(url, info_only=False, **kwargs):
    """Downloads Sina videos by URL.
    """
    if 'news.sina.com.cn/zxt' in url:
        sina_zxt(url, info_only=info_only, **kwargs)
        return

    vid = match1(url, r'vid=(\d+)')
    if vid is None:
        video_page = get_content(url)
        vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'')
        if hd_vid == '0':
            vids = match1(
                video_page, r'[^\w]vid\s*:\s*\'([^\']+)\''
            ).split('|')
            vid = vids[-1]

    if vid is None:
        vid = match1(video_page, r'vid:"?(\d+)"?')
    if vid:
        # title = match1(video_page, r'title\s*:\s*\'([^\']+)\'')
        sina_download_by_vid(vid, info_only=info_only, **kwargs)
    else:
        vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"')
        if vkey is None:
            vid = match1(url, r'#(\d+)')
            sina_download_by_vid(vid, info_only=info_only, **kwargs)
            return
        title = match1(video_page, r'title\s*:\s*"([^"]+)"')
        sina_download_by_vkey(vkey, title=title, info_only=info_only, **kwargs)

Example #24

0

Show file

def panda_download(url, info_only=False, **kwargs):
    roomid = re.search('/(\d+)', url)
    if roomid is None:
        log.wtf('Cannot found room id for this url')
    roomid = roomid.group(1)
    json_request_url = (
        'http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}'.format(
            roomid, int(time.time())))
    content = get_content(json_request_url)
    api_json = json.loads(content)

    errno = api_json['errno']
    errmsg = api_json['errmsg']
    if errno:
        raise ValueError('Errno : {}, Errmsg : {}'.format(errno, errmsg))
    data = api_json['data']
    title = data['roominfo']['name']
    room_key = data['videoinfo']['room_key']
    plflag = data['videoinfo']['plflag'].split('_')
    status = data['videoinfo']['status']
    if status is not '2':
        raise ValueError(
            'The live stream is not online! (status:{})'.format(status))

    data2 = json.loads(data['videoinfo']['plflag_list'])
    rid = data2['auth']['rid']
    sign = data2['auth']['sign']
    ts = data2['auth']['time']
    real_url = ('http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&'
                'rid={}'.format(plflag[1], room_key, sign, ts, rid))
    print_info(site_info, title, 'flv', float('inf'))
    if not info_only:
        download_urls([real_url], title, 'flv', None, **kwargs)

Example #25

0

Show file

File: pinterest.py Project: zuichusixu/Lulu

    def prepare(self, **kwargs):
        # scrape the html
        content = get_content(self.url)
        # extract title
        self.title = match1(
            content,
            r'<meta property="og:description" name="og:description" '
            r'content="([^"]+)"'
        )

        data = match1(
            content,
            r'<script type="application/json" id=\'initial-state\'>(.+)'
            r'</script>'
        )
        data = json.loads(data)
        keys = list(data['resources']['data']['PinPageResource'].keys())
        orig_img = data['resources']['data']['PinPageResource'][keys[0]][
            'data'
        ]['images']['orig']['url']
        twit_img = match1(
            content,
            r'<meta property="twitter:image:src" name="twitter:image:src" '
            r'content="([^"]+)"'
        )
        # construct available streams
        if orig_img:
            self.streams['original'] = {'url': orig_img}
        if twit_img:
            self.streams['small'] = {'url': twit_img}

Example #26

0

Show file

File: le.py Project: ethenlong/Lulu

def letv_download(url, info_only=False, **kwargs):
    url = url_locations([url])[0]
    if re.match(r'http://yuntv.letv.com/', url):
        letvcloud_download(url, info_only=info_only, **kwargs)
    elif 'sports.le.com' in url:
        html = get_content(url)
        vid = match1(url, r'video/(\d+)\.html')
        title = match1(html, r'<h2 class="title">([^<]+)</h2>')
        letv_download_by_vid(vid, title=title, info_only=info_only, **kwargs)
    else:
        html = get_content(url)
        vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') or \
            match1(url, r'http://www.le.com/ptv/vplay/(\d+).html') or \
            match1(html, r'vid="(\d+)"')
        title = match1(html, r'name="irTitle" content="(.*?)"')
        letv_download_by_vid(vid, title=title, info_only=info_only, **kwargs)

Example #27

0

Show file

    def extract(self, **kwargs):
        if not self.streams_sorted:
            # No stream is available
            return

        if 'stream_id' in kwargs and kwargs['stream_id']:
            # Extract the stream
            stream_id = kwargs['stream_id']
            if stream_id not in self.streams \
                    and stream_id not in self.dash_streams:
                log.e('[Error] Invalid video format.')
                log.e('Run \'-i\' command with no specific video format to '
                      'view all available formats.')
                exit(2)
        else:
            # Extract stream with the best quality
            stream_id = self.streams_sorted[0]['itag']

        if stream_id in self.streams:
            src = self.streams[stream_id]['url']
            if self.streams[stream_id]['sig'] is not None:
                sig = self.streams[stream_id]['sig']
                src += '&signature={}'.format(sig)
            elif self.streams[stream_id]['s'] is not None:
                if not hasattr(self, 'js'):
                    self.js = get_content(self.html5player)
                s = self.streams[stream_id]['s']
                sig = self.__class__.decipher(self.js, s)
                src += '&signature={}'.format(sig)

            self.streams[stream_id]['src'] = [src]
            self.streams[stream_id]['size'] = urls_size(
                self.streams[stream_id]['src'])

Example #28

0

Show file

File: netease.py Project: leotop/Lulu

    def extract(self, url, **kwargs):
        if '163.fm' in url:
            url = get_location(url)
        if 'music.163.com' in url:
            self.need_download = False
            self.netease_cloud_music_download(url, **kwargs)
        else:
            html = get_content(url)

            title = match1(html, 'movieDescription=\'([^\']+)\'') or \
                match1(html, '<title>(.+)</title>')

            if title[0] == ' ':
                title = title[1:]

            src = match1(html, r'<source src="([^"]+)"') or \
                match1(html, r'<source type="[^"]+" src="([^"]+)"')

            if src:
                url = src
                _, ext, size = url_info(src)
            else:
                url = (match1(html, r'["\'](.+)-list.m3u8["\']')
                       or match1(html, r'["\'](.+).m3u8["\']')) + '.mp4'
                _, _, size = url_info(url)
                ext = 'mp4'

            return {
                'urls': [url],
                'title': title,
                'file_format': ext,
                'size': size,
            }

Example #29

0

Show file

File: vidto.py Project: hhy5277/Lulu-1

def vidto_download(url, info_only=False, **kwargs):
    html = get_content(url)
    params = {}
    r = re.findall(
        r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">',
        html
    )
    for name, value in r:
        params[name] = value
    data = parse.urlencode(params).encode('utf-8')
    req = request.Request(url, headers=FAKE_HEADERS)
    print('Please wait for 6 seconds...')
    time.sleep(6)
    print('Starting')
    new_html = request.urlopen(req, data).read().decode('utf-8', 'replace')
    new_stff = re.search(r'lnk_download" href="(.*?)">', new_html)
    if new_stff:
        url = new_stff.group(1)
        title = params['fname']
        _type = ''
        ext = ''
        a, b, size = url_info(url)
        print_info(site_info, title, _type, size)
        if not info_only:
            download_urls([url], title, ext, size, **kwargs)
    else:
        log.wtf("Cann't find link, please review")

Example #30

0

Show file

    def prepare(self, **kwargs):
        if self.url:
            self.vid = self.get_vid_from_url(self.url)
        self.referer = self.url
        content = get_content(self.api_endpoint.format(room_id=self.vid))
        content = json.loads(content)
        self.title = content['data']['room_name']
        rtmp_url = content['data']['rtmp_url']
        # stream_avalable = [i['name'] for i in content['data']['stream']]
        stream_available = {}
        stream_available['normal'] = '{}/{}'.format(
            rtmp_url, content['data']['rtmp_live']
        )
        if len(content['data']['rtmp_multi_bitrate']) > 0:
            for k, v in content['data']['rtmp_multi_bitrate'].items():
                stream_available[k] = rtmp_url + '/' + v

        for s in self.stream_types:
            if s['id'] in stream_available.keys():
                quality_id = s['id']
                url = stream_available[quality_id]
                self.streams[quality_id] = {
                    'container': 'flv',
                    'video_profile': s['video_profile'],
                    'size': 0,
                    'url': url
                }