def get_video(url, session, cw): print_ = get_print(cw) html = downloader.read_html(url, session=session) if "document.location.href='https://login." in html: raise errors.LoginRequired() soup = Soup(html) url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content'] print_('url_thumb: {}'.format(url_thumb)) params = re.find('VodParameter *= *[\'"]([^\'"]+)[\'"]', html, err='No VodParameter') params += '&adultView=ADULT_VIEW&_={}'.format(int(time()*1000)) url_xml = 'http://stbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params print(url_xml) html = downloader.read_html(url_xml, session=session, referer=url) soup = Soup(html) if '<flag>PARTIAL_ADULT</flag>' in html: raise errors.LoginRequired() title = soup.find('title').string.strip() urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html) if not urls_m3u8: raise Exception('no m3u8') streams = [] for url_m3u8 in urls_m3u8: try: stream = _get_stream(url_m3u8) except Exception as e: print(e) continue #2193 streams.append(stream) for stream in streams[1:]: streams[0] += stream stream = streams[0] id = url.split('/')[(-1)].split('?')[0].split('#')[0] video = Video(stream, url, id, title, url_thumb) return video
def get_sd(url, session=None, html=None, cw=None, wait=True): print_ = get_print(cw) if html: soup = Soup(html) check_error(soup, cw, wait) for script in soup.findAll('script'): j = get_j(script) if j: break else: raise Exception('no _sharedData!!') else: for try_ in range(4): _wait(cw) html = read_html(url, session, cw) soup = Soup(html) check_error(soup, cw, wait) for script in soup.findAll('script'): j = get_j(script) if j: break else: continue break else: raise Exception('no _sharedData') for script in soup.findAll('script'): s = script.string if s and 'window.__additionalDataLoaded(' in s: s = cut_pair(s) j_add = json.loads(s) try: j['entry_data']['PostPage'][0].update(j_add) except: j['entry_data']['ProfilePage'][0].update(j_add) #2900 # Challenge challenge = j['entry_data'].get('Challenge') if challenge: try: for cont in challenge[0]['extraData']['content']: title = cont.get('title') if title: break else: raise Exception('no title') except: title = 'Err' raise errors.LoginRequired(title) # LoginAndSignupPage login = j['entry_data'].get('LoginAndSignupPage') if login: raise errors.LoginRequired() return j
def get_imgs(url, title, cw=None): print_ = get_print(cw) imgs = [] for p in range(1, 1001): url = setPage(url, p) print_(url) soup = read_soup(url) view = soup.find('div', class_='photos-list') if view is None: if p == 1: raise errors.LoginRequired() else: break # Guest user for img in view.findAll('img'): img = img.attrs['data-src'] img = Image(img, url, len(imgs)) imgs.append(img) pgn = soup.find('ul', class_='pagination') ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')] if pgn else [] if not ps or p >= max(ps): print('max p') break msg = '{} {} ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps)) if cw: cw.setTitle(msg) else: print(msg) return imgs
def get_id(url, cw=None): for try_ in range(2): try: res = clf2.solve(url, cw=cw, f=_get_page_id) html = res['html'] soup = Soup(html) if soup.find('div', class_='gn_login'): raise errors.LoginRequired() oid = _get_page_id(html) if not oid: raise Exception('no page_id') uids = re.findall(r'uid=([0-9]+)', html) uid = max(set(uids), key=uids.count) name = re.find(r"CONFIG\['onick'\]='(.+?)'", html) or soup.find( 'div', class_=lambda c: c and c.startswith('ProfileHeader_name' )).text.strip() if not name: raise Exception('no name') break except errors.LoginRequired as e: raise except Exception as e: e_ = e print(e) else: raise e_ return uid, oid, name
def get_soup_session(url, cw=None): print_ = get_print(cw) session = Session() res = clf2.solve(url, session=session, cw=cw) print_('{} -> {}'.format(url, res['url'])) if res['url'].rstrip('/') == 'https://welovemanga.one': raise errors.LoginRequired() return Soup(res['html']), session
def get(self, url): print_ = get_print(self.cw) if self._url: return self._url ydl = ytdl.YoutubeDL(cw=self.cw) try: info = ydl.extract_info(url) except Exception as e: ex = type(ytdl.get_extractor(url))(ydl) _download_info = getattr(ex, '_download_info', None) if _download_info is not None: vod_id = ex._match_id(url) info = _download_info(vod_id) print_(info) if 'HTTPError 403' in str(e): raise errors.LoginRequired() raise def print_video(video): print_('[{}] [{}] [{}] {}'.format(video['format_id'], video.get('height'), video.get('tbr'), video['url'])) videos = [video for video in info['formats'] if video.get('height')] videos = sorted(videos, key=lambda video: (video.get('height', 0), video.get('tbr', 0)), reverse=True) for video in videos: print_video(video) for video in videos: if video.get('height', 0) <= get_resolution(): #3723 video_best = video break else: video_best = videos[-1] print_video(video) video = video_best['url'] ext = get_ext(video) self.title = info['title'] id = info['display_id'] if ext.lower() == '.m3u8': video = M3u8_stream(video, n_thread=4, alter=alter) ext = '.mp4' self.filename = format_filename(self.title, id, ext) self.url_thumb = info['thumbnail'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) self._url = video return self._url
def get_title(soup): html = str(soup) name = re.find(r'"__isProfile":"Page","name":(".*?")', html) or re.find( r'"name":(".*?")', html) if not name: gc = soup.find('div', id='globalContainer') if gc and gc.find('form', id='login_form'): raise errors.LoginRequired() raise Exception('no name') title = json.loads(name) return title
def login_reqd_func(self, *args, **kwargs): if isinstance(self, reddit.Reddit): user = self.user modhash = self.modhash else: user = self.reddit_session.user modhash = self.reddit_session.modhash if user is None or modhash is None: raise errors.LoginRequired('"%s" requires login.' % func.__name__) else: return func(self, *args, **kwargs)
def get_session(url, cw=None): #res = clf2.solve(url, cw=cw) #return res['session'] session = Session() sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid') if sessionid is None or sessionid.is_expired(): raise errors.LoginRequired() session.headers['User-Agent'] = downloader.hdr['User-Agent'] if not session.cookies.get('csrftoken', domain='.instagram.com'): csrf_token = generate_csrf_token() print('csrf:', csrf_token) session.cookies.set("csrftoken", csrf_token, domain='.instagram.com') return session
def check_error(soup, cw, wait): print_ = get_print(cw) if len(soup.html) < 1000: #4014 raise errors.LoginRequired(soup.html) err = soup.find('div', class_='error-container') if err: err = err.text.strip() if wait: print_('err: {}'.format(err)) sleep(60 * 30, cw) else: raise Exception(err)
def extract_info(url, cw=None): ydl = ytdl.YoutubeDL(cw=cw) try: info = ydl.extract_info(url) except Exception as e: ex = type(ytdl.get_extractor(url))(ydl) _download_info = getattr(ex, '_download_info', None) if _download_info is not None: vod_id = ex._match_id(url) info = _download_info(vod_id) print_(info) if 'HTTPError 403' in str(e): raise errors.LoginRequired() raise return info
def get_albums(page): url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format( uid, page, int(time() * 1000)) referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid) html = downloader.read_html(url, referer, session=session) if '<title>新浪通行证</title>' in html: raise errors.LoginRequired() j = json.loads(html) data = j['data'] albums = [] for album in data['album_list']: id = album['album_id'] type = album['type'] album = Album(id, type) albums.append(album) return albums
def call(self, path, qs, default_qs=True): if default_qs: qs_new = qs qs = self._qs.copy() qs.update(qs_new) url = self._url_base + path url = update_url_query(url, qs) r = self.session.get(url, headers=self._hdr) data = r.json() errs = data.get('errors', []) if errs: code = int(errs[0]['code']) if code == 0: raise Exception('Not found') elif code == 4012: raise errors.LoginRequired(errs[0]['detail']) r.raise_for_status() return data['response']
def get_imgs(url, title, cw=None): print_ = get_print(cw) imgs = [] for p in range(1, 1001): url = setPage(url, p) print_(url) for try_ in range(4): try: html = downloader.read_html( url, user_agent=downloader.hdr['User-Agent']) #sleep(1) break except Exception as e: print(e) else: raise soup = Soup(html) view = soup.find('div', class_='photos-list') if view is None: if p == 1: raise errors.LoginRequired() else: break # Guest user for img in view.findAll('img'): img = img.attrs['data-src'] img = Image(img, url, len(imgs)) imgs.append(img) pgn = soup.find('ul', class_='pagination') ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')] if p >= max(ps): print('max p') break msg = '{} {} ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps)) if cw: cw.setTitle(msg) else: print(msg) return imgs
def get_id(url, cw=None): for try_ in range(2): try: res = clf2.solve(url, cw=cw, f=_get_page_id) html = res['html'] soup = Soup(html) if soup.find('div', class_='gn_login'): raise errors.LoginRequired() m = _get_page_id(html) if not m: raise Exception('no page_id') oid = m.groups()[0] uids = re.findall('uid=([0-9]+)', html) uid = max(set(uids), key=uids.count) name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0] break except errors.LoginRequired as e: raise except Exception as e: e_ = e print(e) else: raise e_ return uid, oid, name
def init(self): self.session = Session() # 1791 if 'pornhubpremium.com' in self.url.lower() and\ not is_login(self.session, self.cw): raise errors.LoginRequired()
def checkLogin(session): c = session.cookies._cookies.get('.weibo.com', {}).get('/', {}).get('SUBP') if not c or c.is_expired(): raise errors.LoginRequired()
def my_id(): sid = Session().cookies.get('PHPSESSID', domain='.pixiv.net') if not sid: raise errors.LoginRequired() return re.find(r'^([0-9]+)', sid, err='no userid')
def get_info(url, cw=None, depth=0): print_ = get_print(cw) api = PixivAPI() info = {} imgs = [] if utils.ui_setting: ugoira_ext = [None, '.gif', '.webp', '.png'][utils.ui_setting.ugoira_convert.currentIndex()] else: ugoira_ext = None if utils.ui_setting: format_ = compatstr(utils.ui_setting.pixivFormat.currentText()) else: format_ = 'id_ppage' max_pid = get_max_range(cw) if api.illust_id(url): # Single post id_ = api.illust_id(url) data = api.illust(id_) login = '******' not in data if FORCE_LOGIN and not login: # raise errors.LoginRequired() if data['xRestrict'] and not login: raise errors.LoginRequired('R-18') info['artist'] = data['userName'] info['artist_id'] = data['userId'] info['raw_title'] = data['illustTitle'] info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_) info['create_date'] = parse_time(data['createDate']) tags_illust = set(tag['tag'] for tag in data['tags']['tags']) if tags_matched(tags_illust, cw): if data['illustType'] == 2: # ugoira data = api.ugoira_meta(id_) ugoira = { 'ext': ugoira_ext, 'delay': [frame['delay'] for frame in data['frames']], } img = Image(data['originalSrc'], url, id_, 0, format_, info, cw, ugoira=ugoira) imgs.append(img) else: data = api.pages(id_) for img in data: img = Image(img['urls']['original'], url, id_, len(imgs), format_, info, cw) imgs.append(img) else: print('tags mismatched') elif '/bookmarks/' in url or 'bookmark.php' in url: # User bookmarks id_ = api.user_id(url) if id_ is None: # id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id']) ids = [] ids_set = set() offset = 0 while len(ids) < max_pid: data = api.bookmarks(id_, offset) c = 0 for id in [work['id'] for work in data['works']]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break offset += LIMIT if depth == 0: check_alive(cw) process_ids(ids[:max_pid], info, imgs, cw, depth) elif '/tags/' in url or 'search.php' in url: # Search q = unquote( re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags')) info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+')) qs = query_url(url) order = qs.get('order', ['date_d'])[0] mode = qs.get('mode', ['all'])[0] ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.search(q, order, mode, p=p) c = 0 for id in [ illust['id'] for illust in data['illustManga']['data'] if 'id' in illust ]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids[:max_pid], info, imgs, cw, depth) elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following r18 = 'bookmark_new_illust_r18.php' in url id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_following_{}{})'.format( info['artist'], 'r18_' if r18 else '', info['artist_id']) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: c = 0 for id in api.following(p, r18=r18): if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids[:max_pid], info, imgs, cw, depth) elif api.user_id(url): # User illusts id_ = api.user_id(url) process_user(id_, info, api) data = api.profile(id_) info['title'] = '{} (pixiv_{})'.format(info['artist'], info['artist_id']) ids = [] for illusts in [data['illusts'], data['manga']]: if not illusts: continue ids += list(illusts.keys()) ids = sorted(ids, key=int, reverse=True) process_ids(ids[:max_pid], info, imgs, cw, depth) else: raise NotImplementedError() info['imgs'] = imgs[:max_pid] return info
def get_info(url, cw=None, depth=0, tags_add=None): print_ = get_print(cw) api = PixivAPI() info = {} imgs = [] ugoira_ext = [None, '.gif', '.webp', '.png' ][utils.ui_setting.ugoira_convert.currentIndex( )] if utils.ui_setting else None format_ = compatstr(utils.ui_setting.pixivFormat.currentText() ) if utils.ui_setting else 'id_ppage' max_pid = get_max_range(cw) if api.illust_id(url): # Single post id_ = api.illust_id(url) data = api.illust(id_) login = '******' not in data if FORCE_LOGIN and not login: # raise errors.LoginRequired() if data['xRestrict'] and not login: raise errors.LoginRequired('R-18') info['artist'] = data['userName'] info['artist_id'] = data['userId'] info['raw_title'] = data['illustTitle'] info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_) info['create_date'] = parse_time(data['createDate']) tags_illust = set(tag['tag'] for tag in data['tags']['tags']) if tags_matched(tags_illust, tags_add, cw): if data['illustType'] == 2: # ugoira data = api.ugoira_meta(id_) ugoira = { 'ext': ugoira_ext, 'delay': [frame['delay'] for frame in data['frames']], } img = Image(data['originalSrc'], url, id_, 0, format_, info, cw, ugoira=ugoira) imgs.append(img) else: data = api.pages(id_) for img in data: img = Image(img['urls']['original'], url, id_, len(imgs), format_, info, cw) imgs.append(img) else: print('tags mismatched') elif '/bookmarks/' in url or 'bookmark.php' in url: # User bookmarks id_ = api.user_id(url) if id_ is None: # id_ = my_id() if id_ == my_id(): rests = ['show', 'hide'] else: rests = ['show'] process_user(id_, info, api) info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id']) ids = [] ids_set = set() for rest in rests: offset = 0 while len(ids) < max_pid: data = api.bookmarks(id_, offset, rest=rest) c = 0 for id in [work['id'] for work in data['works']]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break offset += LIMIT if depth == 0: check_alive(cw) process_ids(ids, info, imgs, cw, depth) elif '/tags/' in url or 'search.php' in url: # Search q = unquote( re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags')) info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+')) qs = query_url(url) order = qs.get('order', ['date_d'])[0] mode = qs.get('mode', ['all'])[0] s_mode = qs.get('s_mode', ['s_tag_full'])[0] scd = qs.get('scd', [None])[0] ecd = qs.get('ecd', [None])[0] type_ = qs.get('type', ['all'])[0] wlt = qs.get('wlt', [None])[0] wgt = qs.get('wgt', [None])[0] hlt = qs.get('hlt', [None])[0] hgt = qs.get('hgt', [None])[0] blt = qs.get('blt', [None])[0] bgt = qs.get('bgt', [None])[0] ratio = qs.get('ratio', [None])[0] tool = qs.get('tool', [None])[0] logs = [ 'order: {}'.format(order), 'mode: {}'.format(mode), 's_mode: {}'.format(s_mode), 'scd / ecd: {} / {}'.format(scd, ecd), 'type: {}'.format(type_), 'wlt / wgt: {} / {}'.format(wlt, wgt), 'hlt / hgt: {} / {}'.format(hlt, hgt), 'blt / bgt: {} / {}'.format(blt, bgt), 'ratio: {}'.format(ratio), 'tool: {}'.format(tool), ] print_('\n'.join(logs)) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.search(q, order, mode, p=p, s_mode=s_mode, scd=scd, ecd=ecd, type_=type_, wlt=wlt, wgt=wgt, hlt=hlt, hgt=hgt, blt=blt, bgt=bgt, ratio=ratio, tool=tool) c = 0 for id in [ illust['id'] for illust in data['illustManga']['data'] if 'id' in illust ]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids, info, imgs, cw, depth) elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following r18 = 'bookmark_new_illust_r18.php' in url id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_following_{}{})'.format( info['artist'], 'r18_' if r18 else '', info['artist_id']) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.following(p, r18=r18) c = 0 for id in data['page']['ids']: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids, info, imgs, cw, depth) elif api.user_id(url): # User illusts m = re.search(r'/users/[0-9]+/([\w]+)/?([^\?#/]*)', url) type_ = { 'illustrations': 'illusts', 'manga': 'manga' }.get(m and m.groups()[0]) if type_: types = [type_] else: types = ['illusts', 'manga'] if m: tag = unquote(m.groups()[1]) or None else: tag = None print_('types: {}, tag: {}'.format(types, tag)) id_ = api.user_id(url) process_user(id_, info, api) data = api.profile(id_) info['title'] = '{} (pixiv_{})'.format(info['artist'], info['artist_id']) ids = [] for type_ in types: illusts = data[type_] if not illusts: continue ids += list(illusts.keys()) ids = sorted(ids, key=int, reverse=True) if not ids: raise Exception('no imgs') process_ids(ids, info, imgs, cw, depth, tags_add=[tag] if tag else None) else: raise NotImplementedError() info['imgs'] = imgs[:max_pid] return info
def get_imgs(username, title, cw=None): urls = [ 'https://m.facebook.com/{}/photos'.format(username), 'https://m.facebook.com/profile.php?id={}&sk=photos'.format( username), # no custom URL ] for url in urls: print('get_imgs url:', url) try: html = read_html(url) except: continue soup = Soup(html) if soup.find('a', id='signup-button'): raise errors.LoginRequired() photo = soup.find('div', class_='_5v64') if photo is not None: break else: raise Exception('No photo div') cursor = photo.a.attrs['href'].split('/photos/')[1].split('/')[1] print('first cursor:', cursor) href = re.find(r'(/photos/pandora/\?album_token=.+?)"', html) href = urljoin(url, href) href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href) cursors = set([cursor]) imgs = [] dups = {} dir = os.path.join(get_outdir('facebook'), title) try: filenames = os.listdir(dir) except: filenames = [] for filename in filenames: name, ext = os.path.splitext(filename) if name.isdigit(): dups[int(name)] = os.path.join(dir, filename) pages = set() while True: print(href) html = read_html(href) data_raw = html.replace('for (;;);', '') data = json.loads(data_raw) actions = data['payload']['actions'] for action in actions: if action['target'] == 'm_more_photos': break else: print('No more photos') break html = action['html'] soup = Soup(html) photos = soup.findAll('div', class_='_5v64') for photo in photos: for a in photo.findAll('a'): page = a.attrs['href'] page = urljoin(href, page) # remove duplicate pages if page in pages: continue pages.add(page) img = Image(page) id = img.id if id in dups and getsize(dups[id]) > 0: print('skip', id) imgs.append(dups[id]) else: imgs.append(img) s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs)) if cw is not None: cw.setTitle(s) if not cw.alive: return [] else: print(s) cursor = re.find(PATTERN_CURSOR, data_raw) #print(cursor) if cursor is None: print('no cursor') break if cursor in cursors: print('same cursor') break cursors.add(cursor) href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href) return imgs
def wrapped(cls, *args, **kwargs): obj = getattr(cls, 'lex_session', cls) if login and not obj.is_logged_in(): raise errors.LoginRequired(function.__name__)