Beispiel #1
0
    def extract(self, **kwargs):
        if not self.streams_sorted:
            # No stream is available
            return

        if 'stream_id' in kwargs and kwargs['stream_id']:
            # Extract the stream
            stream_id = kwargs['stream_id']
            if stream_id not in self.streams \
                    and stream_id not in self.dash_streams:
                log.e('[Error] Invalid video format.')
                log.e('Run \'-i\' command with no specific video format to '
                      'view all available formats.')
                exit(2)
        else:
            # Extract stream with the best quality
            stream_id = self.streams_sorted[0]['itag']

        if stream_id in self.streams:
            src = self.streams[stream_id]['url']
            if self.streams[stream_id]['sig'] is not None:
                sig = self.streams[stream_id]['sig']
                src += '&signature={}'.format(sig)
            elif self.streams[stream_id]['s'] is not None:
                if not hasattr(self, 'js'):
                    self.js = get_content(self.html5player)
                s = self.streams[stream_id]['s']
                sig = self.__class__.decipher(self.js, s)
                src += '&signature={}'.format(sig)

            self.streams[stream_id]['src'] = [src]
            self.streams[stream_id]['size'] = urls_size(
                self.streams[stream_id]['src'])
Beispiel #2
0
def facebook_download(
    url, output_dir='.', merge=True, info_only=False, **kwargs
):
    html = get_content(url)

    title = match1(html, r'<title id="pageTitle">(.+)</title>')

    if title is None:
        title = url

    sd_urls = list(set([
        unicodize(str.replace(i, '\\/', '/'))
        for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html)
    ]))
    hd_urls = list(set([
        unicodize(str.replace(i, '\\/', '/'))
        for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html)
    ]))
    urls = hd_urls if hd_urls else sd_urls

    _type, ext, size = url_info(urls[0], True)
    size = urls_size(urls)

    print_info(site_info, title, _type, size)
    if not info_only:
        download_urls(urls, title, ext, size, output_dir, merge=False)
Beispiel #3
0
    def __call__(self, url, **kwargs):
        '''
        data = {
            'urls': [],
            'title': '',
            'file_format': '',
            'size': '',
        }
        '''
        data = self.extract(url, **kwargs)

        if not self.need_download:
            return

        file_format = data.get('file_format', 'mp4')
        size = data.get('size')
        urls = data['urls']
        if not size:
            if len(urls) == 1:
                size = url_size(urls[0])
            else:
                size = urls_size(urls)
        print_info(site_info=self.site_info,
                   title=data['title'],
                   type=file_format,
                   size=size)
        if not kwargs['info_only']:
            download_urls(urls=urls,
                          title=data['title'],
                          ext=file_format,
                          total_size=size,
                          **kwargs)
Beispiel #4
0
    def extract(self, **kwargs):
        for s in self.streams:
            self.streams[s]['size'] = urls_size(self.streams[s]['src'])

        master_m3u8s = []
        for m in self.master_m3u8:
            master_m3u8s.append(self.master_m3u8[m]['url'])

        master_content = None
        master_url = None

        for master_u in master_m3u8s:
            try:
                master_content = get_content(master_u).split('\n')
            except urllib.error.URLError:
                continue
            else:
                master_url = master_u

        if master_content is None:
            return

        lines = []
        for line in master_content:
            if len(line.strip()) > 0:
                lines.append(line.strip())

        pos = 0
        while pos < len(lines):
            if lines[pos].startswith('#EXT-X-STREAM-INF'):
                patt = 'RESOLUTION=(\d+)x(\d+)'
                hit = re.search(patt, lines[pos])
                if hit is None:
                    continue
                # width = hit.group(1)
                height = hit.group(2)

                if height in ('2160', '1440'):
                    m3u8_url = urllib.parse.urljoin(master_url, lines[pos+1])
                    meta = dict(m3u8_url=m3u8_url, container='m3u8')
                    if height == '1440':
                        meta['video_profile'] = '2560x1440'
                    else:
                        meta['video_profile'] = '3840x2160'
                    meta['size'] = 0
                    meta['src'] = general_m3u8_extractor(m3u8_url)
                    self.streams[height+'p'] = meta

                pos += 2
            else:
                pos += 1
        self.streams_sorted = []
        for stream_type in self.stream_types:
            if stream_type['id'] in self.streams:
                item = [('id', stream_type['id'])] + list(
                    self.streams[stream_type['id']].items()
                )
                self.streams_sorted.append(dict(item))
Beispiel #5
0
def letvcloud_download_by_vu(vu, uu, title=None, info_only=False, **kwargs):
    argumet_dict = {
        'cf': 'flash', 'format': 'json', 'ran': str(int(time.time())),
        'uu': str(uu), 'ver': '2.2', 'vu': str(vu),
    }
    # ALL YOUR BASE ARE BELONG TO US
    sign_key = '2f9d6924b33a165a6d8b5d3d42f4f987'
    str2Hash = ''.join(
        [i + argumet_dict[i] for i in sorted(argumet_dict)]
    ) + sign_key
    sign = hashlib.md5(str2Hash.encode('utf-8')).hexdigest()
    request_info = urllib.request.Request(
        'http://api.letvcloud.com/gpc.php?{}&sign={}'.format(
            '&'.join(
                ['{}={}'.format(i, argumet_dict[i]) for i in argumet_dict]
            ),
            sign
        )
    )
    response = urllib.request.urlopen(request_info)
    data = response.read()
    info = json.loads(data.decode('utf-8'))
    type_available = []
    for video_type in info['data']['video_info']['media']:
        type_available.append({
            'video_url': info['data']['video_info']['media'][video_type][
                'play_url'
            ]['main_url'],
            'video_quality': int(
                info['data']['video_info']['media'][video_type][
                    'play_url'
                ]['vtype']
            )
        })
    urls = [base64.b64decode(sorted(
        type_available,
        key=lambda x: x['video_quality'])[-1]['video_url']
    ).decode('utf-8')]
    size = urls_size(urls)
    ext = 'mp4'
    print_info(site_info, title, ext, size)
    if not info_only:
        download_urls(urls, title, ext, size, **kwargs)
Beispiel #6
0
    def download(self, **kwargs):
        if 'json_output' in kwargs and kwargs['json_output']:
            json_output.output(self)
        elif 'info_only' in kwargs and kwargs['info_only']:
            if 'stream_id' in kwargs and kwargs['stream_id']:
                # Display the stream
                stream_id = kwargs['stream_id']
                if 'index' not in kwargs:
                    self.p(stream_id)
                else:
                    self.p_i(stream_id)
            else:
                # Display all available streams
                if 'index' not in kwargs:
                    self.p([])
                else:
                    stream_id = self.streams_sorted[0]['id'] \
                        if 'id' in self.streams_sorted[0] \
                        else self.streams_sorted[0]['itag']
                    self.p_i(stream_id)

        else:
            if 'stream_id' in kwargs and kwargs['stream_id']:
                # Download the stream
                stream_id = kwargs['stream_id']
            else:
                # Download stream with the best quality
                stream_id = self.streams_sorted[0]['id'] \
                    if 'id' in self.streams_sorted[0] \
                    else self.streams_sorted[0]['itag']

            if 'index' not in kwargs:
                self.p(stream_id)
            else:
                self.p_i(stream_id)

            if stream_id in self.streams:
                urls = self.streams[stream_id]['src']
                ext = self.streams[stream_id]['container']
                total_size = self.streams[stream_id]['size']
            else:
                urls = self.dash_streams[stream_id]['src']
                ext = self.dash_streams[stream_id]['container']
                total_size = self.dash_streams[stream_id]['size']

            if not urls:
                log.wtf('[Failed] Cannot extract video source.')

            if ext == 'm3u8':
                ffmpeg_kwargs = {}
                if 'iqiyi' in self.name:
                    # ffmpeg_kwargs['override'] = True
                    # ffmpeg_kwargs['params'] = {
                    #     '-c:a': 'copy', '-bsf:a': 'aac_adtstoasc'
                    # }
                    m3u8_urls = general_m3u8_extractor(urls[0])
                    # FIXME(iawia002): 如果要计算大小的话需要消耗太多时间
                    if len(m3u8_urls) <= 100:
                        size = urls_size(m3u8_urls)
                    else:
                        size = float('inf')
                    download_urls(m3u8_urls, self.title, 'mp4', size, **kwargs)
                else:
                    download_url_ffmpeg(urls[0],
                                        self.title,
                                        'mp4',
                                        output_dir=kwargs['output_dir'],
                                        merge=kwargs['merge'],
                                        stream=False,
                                        **ffmpeg_kwargs)
            else:
                headers = copy(config.FAKE_HEADERS)
                if self.ua is not None:
                    headers['User-Agent'] = self.ua
                if self.referer is not None:
                    headers['Referer'] = self.referer
                download_urls(urls,
                              self.title,
                              ext,
                              total_size,
                              headers=headers,
                              output_dir=kwargs['output_dir'],
                              merge=kwargs['merge'],
                              av=stream_id in self.dash_streams)
            if 'caption' not in kwargs or not kwargs['caption']:
                print('Skipping captions or danmuku.')
                return
            for lang in self.caption_tracks:
                filename = '%s.%s.srt' % (get_filename(self.title), lang)
                print('Saving %s ... ' % filename, end="", flush=True)
                srt = self.caption_tracks[lang]
                with open(os.path.join(kwargs['output_dir'], filename),
                          'w',
                          encoding='utf-8') as x:
                    x.write(srt)
                print('Done.')
            if self.danmuku is not None and not dry_run:
                filename = '{}.cmt.xml'.format(get_filename(self.title))
                print('Downloading {} ...\n'.format(filename))
                with open(os.path.join(kwargs['output_dir'], filename),
                          'w',
                          encoding='utf8') as fp:
                    fp.write(self.danmuku)

        keep_obj = kwargs.get('keep_obj', False)
        if not keep_obj:
            self.__init__()
Beispiel #7
0
def twitter_download(url, info_only=False, **kwargs):
    html = get_content(url)
    screen_name = match1(html, r'data-screen-name="([^"]*)"') or \
        match1(html, r'<meta name="twitter:title" content="([^"]*)"')
    item_id = match1(html, r'data-item-id="([^"]*)"') or \
        match1(html, r'<meta name="twitter:site:id" content="([^"]*)"')
    page_title = '{} [{}]'.format(screen_name, item_id)

    try:  # extract images
        urls = re.findall(
            r'property="og:image"\s*content="([^"]+:large)"', html
        )
        assert urls
        images = []
        for url in urls:
            url = ':'.join(url.split(':')[:-1]) + ':orig'
            filename = parse.unquote(url.split('/')[-1])
            title = '.'.join(filename.split('.')[:-1])
            ext = url.split(':')[-2].split('.')[-1]
            size = int(get_head(url)['Content-Length'])
            images.append({
                'title': title,
                'url': url,
                'ext': ext,
                'size': size
            })
        size = sum([image['size'] for image in images])
        print_info(site_info, page_title, images[0]['ext'], size)

        if not info_only:
            for image in images:
                title = image['title']
                ext = image['ext']
                size = image['size']
                url = image['url']
                print_info(site_info, title, ext, size)
                download_urls([url], title, ext, size, **kwargs)

    except Exception:  # extract video
        # always use i/cards or videos url
        if not re.match(r'https?://twitter.com/i/', url):
            url = match1(
                html, r'<meta\s*property="og:video:url"\s*content="([^"]+)"'
            )
            if not url:
                url = 'https://twitter.com/i/videos/{}'.format(item_id)
            html = get_content(url)

        data_config = match1(html, r'data-config="([^"]*)"') or \
            match1(html, r'data-player-config="([^"]*)"')
        i = json.loads(unescape(data_config))
        if 'video_url' in i:
            source = i['video_url']
            item_id = i['tweet_id']
            page_title = "{} [{}]".format(screen_name, item_id)
        elif 'playlist' in i:
            source = i['playlist'][0]['source']
            if not item_id:
                page_title = i['playlist'][0]['contentId']
        elif 'vmap_url' in i:
            vmap_url = i['vmap_url']
            vmap = get_content(vmap_url)
            source = match1(vmap, r'<MediaFile>\s*<!\[CDATA\[(.*)\]\]>')
            item_id = i['tweet_id']
            page_title = '{} [{}]'.format(screen_name, item_id)
        elif 'scribe_playlist_url' in i:
            scribe_playlist_url = i['scribe_playlist_url']
            return vine_download(
                scribe_playlist_url, info_only=info_only, **kwargs
            )

        try:
            urls = extract_m3u(source)
        except Exception:
            urls = [source]
        size = urls_size(urls)
        mime, ext = 'video/mp4', 'mp4'

        print_info(site_info, page_title, mime, size)
        if not info_only:
            download_urls(urls, page_title, ext, size, **kwargs)
Beispiel #8
0
 def extract(self, **kwargs):
     if 'stream_id' in kwargs and kwargs['stream_id']:
         i = kwargs['stream_id']
         if 'size' not in self.streams[i]:
             self.streams[i]['size'] = urls_size(self.streams[i]['src'])