Esempio n. 1
0
def read_album(url, session=None):
    '''
    read_album
    '''
    soup = downloader.read_soup(url, session=session)
    id_album = re.find('/album/([0-9]+)', url, err='no album id')
    url_json = 'https://www.pornhub.com/album/show_album_json?album={}'.format(id_album)
    data = downloader.read_json(url_json, url, session=session)
    block = soup.find('div', class_='photoAlbumListBlock')
    href = block.a.attrs['href']
    id_ = re.find('/photo/([0-9]+)', href, err='no photo id')
    ids = [id_]
    while True:
        item = data[id_]
        id_ = item['next']
        if id_ in ids:
            break
        ids.append(id_)

    photos = []
    for id_ in ids:
        item = data[id_]
        img = item['img_large']
        referer = 'https://www.pornhub.com/photo/{}'.format(id_)
        photo = Photo(id_, img, referer)
        photos.append(photo)

    info = {}
    title = clean_title(soup.find('h1', class_='photoAlbumTitleV2').text)
    info['title'] = format_filename(title, 'album_{}'.format(id_album))
    info['photos'] = photos
    return info
Esempio n. 2
0
def get_info(url, cw=None):
    print_ = get_print(cw)
    info = {'videos': []}
    html = downloader.read_html(url)
    soup = Soup(html)
    info['title'] = soup.find('h2', class_='videoCnt_title').text.strip()

    id_ = re.find(PATTERN_ID, url, err='no id')
    print_('id: {}'.format(id_))
    token = re.find(
        r'''window.FC2VideoObject.push\(\[['"]ae['"], *['"](.+?)['"]''',
        html,
        err='no token')
    print_('token: {}'.format(token))

    url_api = 'https://video.fc2.com/api/v3/videoplaylist/{}?sh=1&fs=0'.format(
        id_)
    hdr = {
        'X-FC2-Video-Access-Token': token,
    }
    data = downloader.read_json(url_api, url, headers=hdr)

    url_video = urljoin(
        url, data['playlist'].get('nq') or data['playlist']['sample'])
    url_thumb = soup.find('meta', {'property': 'og:image'})['content']
    video = Video(url_video, url_thumb, url, info['title'], id_)
    info['videos'].append(video)

    return info
 def _call(self, url_api, referer='https://twitter.com', params=None):
     url_api = urljoin('https://api.twitter.com', url_api)
     if params:
         url_api = update_url_query(url_api, params)
     #print('call:', url_api)
     data = downloader.read_json(url_api, referer, session=self.session)
     return data
Esempio n. 4
0
 def get(self, referer):
     # https://j.nozomi.la/nozomi.js
     s_id = str(self._id)
     url_post = 'https://j.nozomi.la/post/{}/{}/{}.json'.format(
         s_id[-1], s_id[-3:-1], self._id)
     j = downloader.read_json(url_post, referer)
     url = urljoin(referer, j['imageurl'])
     ext = get_ext(url)
     self.filename = '{}{}'.format(self._id, ext)
     return url
Esempio n. 5
0
 def call(self, url):
     url = urljoin('https://www.pixiv.net/ajax/', url)
     e_ = None
     try:
         info = downloader.read_json(url, session=self.session)
     except requests.exceptions.HTTPError as e:
         code = e.response.status_code
         if code in (403, 404):
             e_ = HTTPError('{} Client Error'.format(code))
         else:
             raise e
     if e_:
         raise e_
     err = info['error']
     if err:
         raise PixivAPIError(info.get('message'))
     return info['body']
Esempio n. 6
0
 def call(self, url):
     for (key, value) in self.params.items():
         url += f'&{key}={value}'
     return downloader.read_json(url, session=self.session)
Esempio n. 7
0
    def get(self, url):
        '''
        get
        '''
        cw = self.cw
        session = self.session
        print_ = get_print(cw)
        if self._url:
            return self._url

        id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \
              re.find(r'/embed/(\w+)', url, re.IGNORECASE, err='no id')
        print_('id: {}'.format(id_))
        if 'viewkey=' not in url.lower() and '/gif/' not in url.lower():
            url = urljoin(url, '/view_video.php?viewkey={}'.format(id_))

        url_test = url.replace('pornhubpremium.com', 'pornhub.com')
        try:
            html = downloader.read_html(url_test, session=session)
            soup = Soup(html)
            if soup.find('div', id='lockedPlayer'):
                print_('Locked player')
                raise Exception('Locked player')
            url = url_test
        except: #3511
            url = url.replace('pornhub.com', 'pornhubpremium.com')
            html = downloader.read_html(url, session=session)
            
        soup = Soup(html)
        soup = fix_soup(soup, url, session, cw)
        html = soup.html

        # removed
        if soup.find('div', class_='removed'):
            raise Exception('removed')

        gif = soup.find('div', {'id': 'gifImageSection'})
        if gif:
            print_('GIF')
            id_ = url.split('/gif/')[1]
            id_ = re.findall('[0-9a-zA-Z]+', id_)[0]
            
            jss = list(gif.children)
            for js in jss:
                if 'data-mp4' in getattr(js, 'attrs', {}):
                    break
            else:
                raise Exception('gif mp4 url not found')

            title = js['data-gif-title']
            url = js['data-mp4']
            url_thumb = re.find(r'https?://.+?.phncdn.com/pics/gifs/.+?\.jpg', html, err='no thumb')
            file = File('gif_{}'.format(id_), title, url, url_thumb)
        else:
            if id_ is None:
                raise Exception('no id')

            print_('Video')

            # 1968
            #title = j['video_title']
            title = soup.find('h1', class_='title').text.strip()

            video_urls = []
            video_urls_set = set()

            def int_or_none(s):
                try:
                    return int(s)
                except:
                    return None

            def url_or_none(url):
                if not url or not isinstance(url, str):
                    return None
                url = url.strip()
                return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
            
            flashvars  = json.loads(re.find(r'var\s+flashvars_\d+\s*=\s*({.+?});', html, err='no flashvars'))
            url_thumb = flashvars.get('image_url')
            media_definitions = flashvars.get('mediaDefinitions')
            if isinstance(media_definitions, list):
                for definition in media_definitions:
                    if not isinstance(definition, dict):
                        continue
                    video_url = definition.get('videoUrl')
                    if not video_url or not isinstance(video_url, str):
                        continue
                    if video_url in video_urls_set:
                        continue
                    video_urls_set.add(video_url)
                    video_urls.append(
                        (video_url, int_or_none(definition.get('quality'))))

            def extract_js_vars(webpage, pattern, default=object()):
                assignments = re.find(pattern, webpage, default=default)
                if not assignments:
                    return {}

                assignments = assignments.split(';')

                js_vars = {}

                def remove_quotes(s):
                    if s is None or len(s) < 2:
                        return s
                    for quote in ('"', "'", ):
                        if s[0] == quote and s[-1] == quote:
                            return s[1:-1]
                    return s

                def parse_js_value(inp):
                    inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
                    if '+' in inp:
                        inps = inp.split('+')
                        return functools.reduce(
                            operator.concat, map(parse_js_value, inps))
                    inp = inp.strip()
                    if inp in js_vars:
                        return js_vars[inp]
                    return remove_quotes(inp)

                for assn in assignments:
                    assn = assn.strip()
                    if not assn:
                        continue
                    assn = re.sub(r'var\s+', '', assn)
                    vname, value = assn.split('=', 1)
                    js_vars[vname] = parse_js_value(value)
                return js_vars

            def add_video_url(video_url):
                v_url = url_or_none(video_url)
                if not v_url:
                    return
                if v_url in video_urls_set:
                    return
                video_urls.append((v_url, None))
                video_urls_set.add(v_url)

            def parse_quality_items(quality_items):
                q_items = json.loads(quality_items)
                if not isinstance(q_items, list):
                    return
                for item in q_items:
                    if isinstance(item, dict):
                        add_video_url(item.get('url'))

            if not video_urls:
                print_('# extract video_urls 2')
                FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
                js_vars = extract_js_vars(
                    html, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
                    default=None)
                if js_vars:
                    for key, format_url in js_vars.items():
                        if key.startswith(FORMAT_PREFIXES[-1]):
                            parse_quality_items(format_url)
                        elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
                            add_video_url(format_url)
                if not video_urls and re.search(
                        r'<[^>]+\bid=["\']lockedPlayer', html):
                    raise Exception('Video is locked')

##            if not video_urls:
##                print_('# extract video_urls 3')
##                js_vars = extract_js_vars(
##                    dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
##                add_video_url(js_vars['mediastring'])

            for mobj in re.finditer(
                    r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
                    html):
                video_url = mobj.group('url')
                if video_url not in video_urls_set:
                    video_urls.append((video_url, None))
                    video_urls_set.add(video_url)

            video_urls_ = video_urls
            video_urls = []
            for video_url, height in video_urls_:
                if '/video/get_media' in video_url:
                    print_(video_url)
                    medias = downloader.read_json(video_url, session=session)
                    if isinstance(medias, list):
                        for media in medias:
                            if not isinstance(media, dict):
                                continue
                            video_url = url_or_none(media.get('videoUrl'))
                            if not video_url:
                                continue
                            height = int_or_none(media.get('quality'))
                            video_urls.append((video_url, height))
                    continue
                video_urls.append((video_url, height))
                

            videos = []
            for video_url, height in video_urls:
                video = {}
                video['height'] = height or int_or_none(re.find(r'(?P<height>\d+)[pP]?_\d+[kK]', video_url))
                video['quality'] = video['height'] or 0
                video['videoUrl'] = video_url
                ext = get_ext(video_url)
                video['ext'] = ext
                if ext.lower() == '.m3u8':
                    video['quality'] -= 1
                print_('[{}p] {} {}'.format(video['height'], video['ext'], video['videoUrl']))
                videos.append(video)

            if not videos:
                raise Exception('No videos')

            videos = sorted(videos, key=lambda video: video['quality'])

            res = get_resolution()

            videos_good = [video for video in videos if video['quality'] <= res]
            if videos_good:
                video = videos_good[-1]
            else:
                video = videos[0]
            print_('\n[{}p] {} {}'.format(video['height'], video['ext'], video['videoUrl']))

            file = File(id_, title, video['videoUrl'].strip(), url_thumb)
        
        self._url = file.url
        self.title = file.title
        self.filename = file.filename
        self.thumb = file.thumb
        return self._url