def read_album(url, session=None): ''' read_album ''' soup = downloader.read_soup(url, session=session) id_album = re.find('/album/([0-9]+)', url, err='no album id') url_json = 'https://www.pornhub.com/album/show_album_json?album={}'.format(id_album) data = downloader.read_json(url_json, url, session=session) block = soup.find('div', class_='photoAlbumListBlock') href = block.a.attrs['href'] id_ = re.find('/photo/([0-9]+)', href, err='no photo id') ids = [id_] while True: item = data[id_] id_ = item['next'] if id_ in ids: break ids.append(id_) photos = [] for id_ in ids: item = data[id_] img = item['img_large'] referer = 'https://www.pornhub.com/photo/{}'.format(id_) photo = Photo(id_, img, referer) photos.append(photo) info = {} title = clean_title(soup.find('h1', class_='photoAlbumTitleV2').text) info['title'] = format_filename(title, 'album_{}'.format(id_album)) info['photos'] = photos return info
def get_info(url, cw=None): print_ = get_print(cw) info = {'videos': []} html = downloader.read_html(url) soup = Soup(html) info['title'] = soup.find('h2', class_='videoCnt_title').text.strip() id_ = re.find(PATTERN_ID, url, err='no id') print_('id: {}'.format(id_)) token = re.find( r'''window.FC2VideoObject.push\(\[['"]ae['"], *['"](.+?)['"]''', html, err='no token') print_('token: {}'.format(token)) url_api = 'https://video.fc2.com/api/v3/videoplaylist/{}?sh=1&fs=0'.format( id_) hdr = { 'X-FC2-Video-Access-Token': token, } data = downloader.read_json(url_api, url, headers=hdr) url_video = urljoin( url, data['playlist'].get('nq') or data['playlist']['sample']) url_thumb = soup.find('meta', {'property': 'og:image'})['content'] video = Video(url_video, url_thumb, url, info['title'], id_) info['videos'].append(video) return info
def _call(self, url_api, referer='https://twitter.com', params=None): url_api = urljoin('https://api.twitter.com', url_api) if params: url_api = update_url_query(url_api, params) #print('call:', url_api) data = downloader.read_json(url_api, referer, session=self.session) return data
def get(self, referer): # https://j.nozomi.la/nozomi.js s_id = str(self._id) url_post = 'https://j.nozomi.la/post/{}/{}/{}.json'.format( s_id[-1], s_id[-3:-1], self._id) j = downloader.read_json(url_post, referer) url = urljoin(referer, j['imageurl']) ext = get_ext(url) self.filename = '{}{}'.format(self._id, ext) return url
def call(self, url): url = urljoin('https://www.pixiv.net/ajax/', url) e_ = None try: info = downloader.read_json(url, session=self.session) except requests.exceptions.HTTPError as e: code = e.response.status_code if code in (403, 404): e_ = HTTPError('{} Client Error'.format(code)) else: raise e if e_: raise e_ err = info['error'] if err: raise PixivAPIError(info.get('message')) return info['body']
def call(self, url): for (key, value) in self.params.items(): url += f'&{key}={value}' return downloader.read_json(url, session=self.session)
def get(self, url): ''' get ''' cw = self.cw session = self.session print_ = get_print(cw) if self._url: return self._url id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \ re.find(r'/embed/(\w+)', url, re.IGNORECASE, err='no id') print_('id: {}'.format(id_)) if 'viewkey=' not in url.lower() and '/gif/' not in url.lower(): url = urljoin(url, '/view_video.php?viewkey={}'.format(id_)) url_test = url.replace('pornhubpremium.com', 'pornhub.com') try: html = downloader.read_html(url_test, session=session) soup = Soup(html) if soup.find('div', id='lockedPlayer'): print_('Locked player') raise Exception('Locked player') url = url_test except: #3511 url = url.replace('pornhub.com', 'pornhubpremium.com') html = downloader.read_html(url, session=session) soup = Soup(html) soup = fix_soup(soup, url, session, cw) html = soup.html # removed if soup.find('div', class_='removed'): raise Exception('removed') gif = soup.find('div', {'id': 'gifImageSection'}) if gif: print_('GIF') id_ = url.split('/gif/')[1] id_ = re.findall('[0-9a-zA-Z]+', id_)[0] jss = list(gif.children) for js in jss: if 'data-mp4' in getattr(js, 'attrs', {}): break else: raise Exception('gif mp4 url not found') title = js['data-gif-title'] url = js['data-mp4'] url_thumb = re.find(r'https?://.+?.phncdn.com/pics/gifs/.+?\.jpg', html, err='no thumb') file = File('gif_{}'.format(id_), title, url, url_thumb) else: if id_ is None: raise Exception('no id') print_('Video') # 1968 #title = j['video_title'] title = soup.find('h1', class_='title').text.strip() video_urls = [] video_urls_set = set() def int_or_none(s): try: return int(s) except: return None def url_or_none(url): if not url or not isinstance(url, str): return None url = url.strip() return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None flashvars = json.loads(re.find(r'var\s+flashvars_\d+\s*=\s*({.+?});', html, err='no flashvars')) url_thumb = flashvars.get('image_url') media_definitions = flashvars.get('mediaDefinitions') if isinstance(media_definitions, list): for definition in media_definitions: if not isinstance(definition, dict): continue video_url = definition.get('videoUrl') if not video_url or not isinstance(video_url, str): continue if video_url in video_urls_set: continue video_urls_set.add(video_url) video_urls.append( (video_url, int_or_none(definition.get('quality')))) def extract_js_vars(webpage, pattern, default=object()): assignments = re.find(pattern, webpage, default=default) if not assignments: return {} assignments = assignments.split(';') js_vars = {} def remove_quotes(s): if s is None or len(s) < 2: return s for quote in ('"', "'", ): if s[0] == quote and s[-1] == quote: return s[1:-1] return s def parse_js_value(inp): inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) if '+' in inp: inps = inp.split('+') return functools.reduce( operator.concat, map(parse_js_value, inps)) inp = inp.strip() if inp in js_vars: return js_vars[inp] return remove_quotes(inp) for assn in assignments: assn = assn.strip() if not assn: continue assn = re.sub(r'var\s+', '', assn) vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) return js_vars def add_video_url(video_url): v_url = url_or_none(video_url) if not v_url: return if v_url in video_urls_set: return video_urls.append((v_url, None)) video_urls_set.add(v_url) def parse_quality_items(quality_items): q_items = json.loads(quality_items) if not isinstance(q_items, list): return for item in q_items: if isinstance(item, dict): add_video_url(item.get('url')) if not video_urls: print_('# extract video_urls 2') FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') js_vars = extract_js_vars( html, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), default=None) if js_vars: for key, format_url in js_vars.items(): if key.startswith(FORMAT_PREFIXES[-1]): parse_quality_items(format_url) elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): add_video_url(format_url) if not video_urls and re.search( r'<[^>]+\bid=["\']lockedPlayer', html): raise Exception('Video is locked') ## if not video_urls: ## print_('# extract video_urls 3') ## js_vars = extract_js_vars( ## dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') ## add_video_url(js_vars['mediastring']) for mobj in re.finditer( r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', html): video_url = mobj.group('url') if video_url not in video_urls_set: video_urls.append((video_url, None)) video_urls_set.add(video_url) video_urls_ = video_urls video_urls = [] for video_url, height in video_urls_: if '/video/get_media' in video_url: print_(video_url) medias = downloader.read_json(video_url, session=session) if isinstance(medias, list): for media in medias: if not isinstance(media, dict): continue video_url = url_or_none(media.get('videoUrl')) if not video_url: continue height = int_or_none(media.get('quality')) video_urls.append((video_url, height)) continue video_urls.append((video_url, height)) videos = [] for video_url, height in video_urls: video = {} video['height'] = height or int_or_none(re.find(r'(?P<height>\d+)[pP]?_\d+[kK]', video_url)) video['quality'] = video['height'] or 0 video['videoUrl'] = video_url ext = get_ext(video_url) video['ext'] = ext if ext.lower() == '.m3u8': video['quality'] -= 1 print_('[{}p] {} {}'.format(video['height'], video['ext'], video['videoUrl'])) videos.append(video) if not videos: raise Exception('No videos') videos = sorted(videos, key=lambda video: video['quality']) res = get_resolution() videos_good = [video for video in videos if video['quality'] <= res] if videos_good: video = videos_good[-1] else: video = videos[0] print_('\n[{}p] {} {}'.format(video['height'], video['ext'], video['videoUrl'])) file = File(id_, title, video['videoUrl'].strip(), url_thumb) self._url = file.url self.title = file.title self.filename = file.filename self.thumb = file.thumb return self._url