def get_video(url, session): while url.strip().endswith('/'): url = url[:-1] html = downloader.read_html(url, session=session) soup = Soup(html) url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content'] params = re.findall('VodParameter *= *[\'"]([^\'"]+)[\'"]', html)[0] params += '&adultView=ADULT_VIEW&_={}'.format(int(time() * 1000)) url_xml = 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params print(url_xml) html = downloader.read_html(url_xml, session=session, referer=url) soup = Soup(html) title = soup.find('title').string.strip() urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html) if not urls_m3u8: raise Exception('no m3u8') streams = [] for url_m3u8 in urls_m3u8: try: stream = _get_stream(url_m3u8) except Exception as e: print(e) continue #2193 streams.append(stream) for stream in streams[1:]: streams[0] += stream stream = streams[0] id = url.split('/')[(-1)].split('?')[0].split('#')[0] video = Video(stream, url, id, title, url_thumb) return video
def get_video(url, session, cw): print_ = get_print(cw) html = downloader.read_html(url, session=session) if "document.location.href='https://login." in html: raise errors.LoginRequired() soup = Soup(html) url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content'] print_('url_thumb: {}'.format(url_thumb)) params = re.find('VodParameter *= *[\'"]([^\'"]+)[\'"]', html, err='No VodParameter') params += '&adultView=ADULT_VIEW&_={}'.format(int(time()*1000)) url_xml = 'http://stbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params print(url_xml) html = downloader.read_html(url_xml, session=session, referer=url) soup = Soup(html) if '<flag>PARTIAL_ADULT</flag>' in html: raise errors.LoginRequired() title = soup.find('title').string.strip() urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html) if not urls_m3u8: raise Exception('no m3u8') streams = [] for url_m3u8 in urls_m3u8: try: stream = _get_stream(url_m3u8) except Exception as e: print(e) continue #2193 streams.append(stream) for stream in streams[1:]: streams[0] += stream stream = streams[0] id = url.split('/')[(-1)].split('?')[0].split('#')[0] video = Video(stream, url, id, title, url_thumb) return video
def _guest_token(session, headers, cache=True): global CACHE_GUEST_TOKEN token = None if cache: if CACHE_GUEST_TOKEN and time( ) - CACHE_GUEST_TOKEN[1] < TIMEOUT_GUEST_TOKEN: token = CACHE_GUEST_TOKEN[0] if token is None: print('!!! get guest_token') name = 'x-guest-token' if name in headers: del headers[name] r = session.post('https://api.twitter.com/1.1/guest/activate.json', headers=headers) data = json.loads(r.text) token = data['guest_token'] CACHE_GUEST_TOKEN = token, time() return token
def get_imgs(page, session, cw): print_ = get_print(cw) if not downloader.cookiejar.get( 'PROF', domain='.daum.net') and page.serviceType != 'free': #3314 raise NotPaidError() html = downloader.read_html(page.url, session=session) header, id = get_id(page.url) t = int(time()) soup = Soup(html) type_ = header_to_type(header) url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) if header == 'league_': m_type = None else: m_type = data['data']['webtoonEpisode']['multiType'] print_('m_type: {}'.format(m_type)) if m_type == 'chatting': page.url = page.url.replace('daum.net/', 'daum.net/m/') url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) imgs = [] for chat in data['data']['webtoonEpisodeChattings']: img = chat.get('image') if not img: continue img = Image(img['url'], page, len(imgs)) imgs.append(img) else: url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) if not data.get('data'): raise NotPaidError() imgs = [] for img in data['data']: img = Image(img['url'], page, len(imgs)) imgs.append(img) return imgs
def get_video(url, session=None): if session is None: session = Session() session.headers['User-Agent'] = downloader.hdr['User-Agent'] session.headers['X-Directive'] = 'api' html = downloader.read_html(url, session=session) soup = Soup(html) for script in soup.findAll('script'): script = script.text or script.string or '' data = re.find('window.__NUXT__=(.+)', script) if data is not None: data = data.strip() if data.endswith(';'): data = data[:-1] data = json.loads(data) break else: raise Exception('No __NUXT__') info = data['state']['data']['video']['hentai_video'] query = info['slug'] #url_api = 'https://members.hanime.tv/api/v3/videos_manifests/{}?'.format(query) # old url_api = 'https://hanime.tv/rapi/v7/videos_manifests/{}?'.format( query) # new print(url_api) hdr = { 'x-signature': ''.join('{:x}'.format(randrange(16)) for i in range(32)), 'x-signature-version': 'web2', 'x-time': str(int(time())), } r = session.get(url_api, headers=hdr) print(r) data = json.loads(r.text) streams = [] for server in data['videos_manifest']['servers']: streams += server['streams'] streams_good = [] for stream in streams: url_video = stream['url'] if not url_video or 'deprecated.' in url_video: continue streams_good.append(stream) if not streams_good: raise Exception('No video available') print('len(streams_good):', len(streams_good)) for stream in streams_good: print(stream['extension'], stream['width'], stream['filesize_mbs'], stream['url']) stream = streams_good[0] return Video(info, stream), session
def __init__(self, session, cw=None): self.session = session self.cw = cw csrf = session.cookies.get('ct0', domain='.twitter.com') print('csrf:', csrf) if not csrf: csrf = hashlib.md5(str(time()).encode()).hexdigest() hdr = { "authorization": AUTH, "x-twitter-client-language": "en", "x-twitter-active-user": "******", "x-csrf-token": csrf, "Origin": "https://twitter.com", } session.headers.update(hdr) session.cookies.set('ct0', csrf, domain='.twitter.com') if session.cookies.get("auth_token", domain=".twitter.com"): session.headers["x-twitter-auth-type"] = "OAuth2Session" else: # guest token guest_token = _guest_token(session.headers) session.headers["x-guest-token"] = guest_token session.cookies.set("gt", guest_token, domain=".twitter.com") self.params = { "include_profile_interstitial_type": "1", "include_blocking": "1", "include_blocked_by": "1", "include_followed_by": "1", "include_want_retweets": "1", "include_mute_edge": "1", "include_can_dm": "1", "include_can_media_tag": "1", "skip_status": "1", "cards_platform": "Web-12", "include_cards": "1", "include_composer_source": "true", "include_ext_alt_text": "true", "include_reply_count": "1", "tweet_mode": "extended", "include_entities": "true", "include_user_entities": "true", "include_ext_media_color": "true", "include_ext_media_availability": "true", "send_error_codes": "true", "simple_quoted_tweet": "true", # "count": "20", "count": "100", #"cursor": None, "ext": "mediaStats%2ChighlightedLabel%2CcameraMoment", "include_quote_count": "true", }
def _guest_token(session, headers, cache=True, cw=None): global CACHE_GUEST_TOKEN print_ = get_print(cw) token = None if cache: if CACHE_GUEST_TOKEN and time( ) - CACHE_GUEST_TOKEN[1] < TIMEOUT_GUEST_TOKEN: token = CACHE_GUEST_TOKEN[0] if token is None: print('!!! get guest_token') name = 'x-guest-token' if name in headers: del headers[name] r = session.post('https://api.twitter.com/1.1/guest/activate.json', headers=headers) data = json.loads(r.text) token = data['guest_token'] print_('token type: {}'.format(type(token))) if isinstance(token, int): #3525 token = str(token) CACHE_GUEST_TOKEN = token, time() return token
def get_albums(page): url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format( uid, page, int(time() * 1000)) referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid) html = downloader.read_html(url, referer, session=session) j = json.loads(html) data = j['data'] albums = [] for album in data['album_list']: id = album['album_id'] type = album['type'] album = Album(id, type) albums.append(album) return albums
def get_imgs(page, session, cw): print_ = get_print(cw) html = downloader.read_html(page.url, session=session) header, id = get_id(page.url) t = int(time()) soup = Soup(html) if 'league_' in id: type_ = 'leaguetoon' else: type_ = 'webtoon' url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) m_type = data['data']['webtoonEpisode']['multiType'] print_('m_type: {}'.format(m_type)) if m_type == 'chatting': page.url = page.url.replace('daum.net/', 'daum.net/m/') url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) imgs = [] for chat in data['data']['webtoonEpisodeChattings']: img = chat.get('image') if not img: continue img = Image(img['url'], page, len(imgs)) imgs.append(img) else: url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) imgs = [] for img in data['data']: img = Image(img['url'], page, len(imgs)) imgs.append(img) return imgs
def get_album_imgs(album, page): url = 'https://photo.weibo.com/photos/get_all?uid={}&album_id={}&count=30&page={}&type={}&__rnd={}'.format( uid, album.id, page, album.type, int(time() * 1000)) referer = 'https://photo.weibo.com/{}/talbum/index'.format(uid) html = downloader.read_html(url, referer, session=session, timeout=30) j = json.loads(html) data = j['data'] imgs = [] for photo in data['photo_list']: host = photo['pic_host'] name = photo['pic_name'] id = photo['photo_id'] timestamp = photo['timestamp'] date = datetime.fromtimestamp(timestamp) t = '{:02}-{:02}-{:02}'.format(date.year % 100, date.month, date.day) url = '{}/large/{}'.format(host, name) ext = os.path.splitext(name)[1] filename = '[{}] {}{}'.format(t, id, ext) img = Image(url, filename, timestamp) imgs.append(img) return imgs
def get_info(url, session): referer = url header, id = get_id(referer) type_ = header_to_type(header) info = {} ids = set() pages = [] for p in range(1, 1 + 10): if p == 1: url = 'http://webtoon.daum.net/data/pc/{}/view/{}?timeStamp={}'.format( type_, id, int(time())) else: if type_ == 'webtoon': break url = 'http://webtoon.daum.net/data/pc/{}/view/{}?page_no={}&timeStamp={}'.format( type_, id, p, int(time())) print(url) info_raw = downloader.read_html(url, referer=referer, session=session) _info = json.loads(info_raw) webtoon = _info['data'].get('webtoon') or _info['data'].get( 'leaguetoon') if webtoon is None: raise Exception('No webtoon') if p == 1: info['title'] = webtoon['title'] artists = [] for artist in webtoon['cartoon']['artists']: artist = artist['penName'] if artist in artists: continue artists.append(artist) if len(artists) > 1: artists = [artists[1], artists[0]] + artists[2:] info['artists'] = artists eps = webtoon.get('webtoonEpisodes') or webtoon.get( 'leaguetoonEpisodes') if not eps: if p > 1: eps = [] else: raise Exception('No eps') c = 0 for ep in eps: id_ = ep.get('articleId') or ep.get('id') title = ep['title'] serviceType = 'free' if type_ == 'leaguetoon' else ep['serviceType'] if type_ == 'leaguetoon': url = 'http://webtoon.daum.net/league/viewer/{}'.format(id_) else: url = 'http://webtoon.daum.net/webtoon/viewer/{}'.format(id_) if id_ in ids: continue c += 1 ids.add(id_) page = Page(id_, url, title, serviceType) pages.append(page) if c == 0: print('c == 0; break') break info['pages'] = sorted(pages, key=lambda x: x.id) return info