def __get_cookie(self) -> Session: session = requests.Session() user_key = Session().cookies.get("USERKEY", domain=".novelpia.com") login_key = Session().cookies.get("LOGINKEY", domain=".novelpia.com") if user_key and login_key: session.cookies.set("USERKEY", user_key, domain=".novelpia.com") session.cookies.set("LOGINKEY", login_key, domain=".novelpia.com") return session
def soup(self): if self._soup is None: self.session = Session() self._soup = get_soup(self.url, session=self.session, cw=self.customWidget) return self._soup
def init(self): self.url = clean_url(self.url) self.session = Session() if re.search(PATTERN_ID, self.url): #1799 select = self.soup.find('select', class_='bookselect') for i, op in enumerate(select.findAll('option')[::-1]): if 'selected' in op.attrs: break else: raise Exception('no selected option') for a in self.soup.findAll('a'): url = urljoin(self.url, a.get('href') or '') if re.search(PATTERN, url): break else: raise Exception('list not found') self.url = self.fix_url(url) self._soup = None for i, page in enumerate( get_pages(self.url, self.session, self.soup)): if page.id == int(op['value']): break else: raise Exception('can not find page') self.cw.range_p = [i]
def enter(): print('enter') session = Session() r = session.get(URL_ENTER) # 862 html = r.text soup = Soup(html) box = soup.find('aside', id='FilterBox') data = {} for select in box.findAll('select'): name = select.attrs['name'] value = select.findAll('option')[-1].attrs['value'] print(name, value) data[name] = value for input in box.findAll('input'): name = input.attrs['name'] value = input.attrs['value'] if name.startswith('rating_') or 'CSRF_TOKEN' in name: print(name, value) data[name] = value data.update({ 'filter_media': 'A', 'filter_order': 'date_new', 'filter_type': '0', }) r = session.post(URL_FILTER, data=data, headers={'Referer': r.url}) print(r) return session
def get_id(url, cw=None): print_ = get_print(cw) url = url.split('?')[0].split('#')[0] if '/artwork/' in url: id_art = get_id_art(url) imgs = get_imgs_page(id_art, session=Session(), cw=cw) return imgs[0].data['user']['username'] if '.artstation.' in url and 'www.artstation.' not in url: id = url.split('.artstation')[0].split('//')[-1] type = None elif 'artstation.com' in url: paths = url.split('artstation.com/')[1].split('/') id = paths[0] type = paths[1] if len(paths) > 1 else None else: id = url.replace('artstation_', '').replace('/', '/') type = None if type not in [None, 'likes']: type = None print_('type: {}, id: {}'.format(type, id)) if type: return '{}/{}'.format(id, type) return id
def init(self): self.url = self.url.replace('lhscan.net', 'loveheaven.net') self.session = Session() #clf2.solve(self.url, session=self.session, cw=self.cw) soup = self.soup if not soup.find('ul', class_='manga-info'): self.Invalid(u'{}: {}'.format(tr_(u'목록 주소를 입력해주세요'), self.url))
def get_session(): session = Session() session.cookies.set(name='over18', value='yes', path='/', domain='.syosetu.com') return session
def get_soup(url, session=None): if session is None: session = Session() res = clf2.solve(url, session=session) soup = Soup(res['html'], apply_css=True) return session, soup, res['url']
def init(self): if u'bdsmlr.com/post/' in self.url: raise errors.Invalid( tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url)) self.url = 'https://{}.bdsmlr.com'.format(self.id_) self.session = Session() clf2.solve(self.url, session=self.session, cw=self.cw)
def get_soup_session(url, cw=None): print_ = get_print(cw) session = Session() res = clf2.solve(url, session=session, cw=cw) print_('{} -> {}'.format(url, res['url'])) if res['url'].rstrip('/') == 'https://welovemanga.one': raise errors.LoginRequired() return Soup(res['html']), session
def read(self): session = Session() video = get_video(self.url, session, self.cw) self.urls.append(video.url) self.setIcon(video.thumb) self.title = video.title
def get_videos(url, cw=None): print_ = get_print(cw) info = {} user_id = re.find(r'twitch.tv/([^/?]+)', url, err='no user_id') print(user_id) session = Session() r = session.get(url) s = cut_pair(re.find(r'headers *: *({.*)', r.text, err='no headers')) print(s) headers = json_loads(s) payload = [{ 'operationName': 'ClipsCards__User', 'variables': { 'login': user_id, 'limit': 20, 'criteria': { 'filter': 'ALL_TIME' } }, 'extensions': { 'persistedQuery': { 'version': 1, 'sha256Hash': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777' } }, }] videos = [] cursor = None cursor_new = None while True: if cursor: payload[0]['variables']['cursor'] = cursor r = session.post('https://gql.twitch.tv/gql', json=payload, headers=headers) #print(r) data = r.json() for edge in data[0]['data']['user']['clips']['edges']: url_video = edge['node']['url'] info['name'] = edge['node']['broadcaster']['displayName'] video = Video(url_video) video.id = int(edge['node']['id']) videos.append(video) cursor_new = edge['cursor'] print_('videos: {} / cursor: {}'.format(len(videos), cursor)) if cursor == cursor_new: print_('same cursor') break if cursor_new is None: break cursor = cursor_new if not videos: raise Exception('no videos') info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True) return info
def init(self): self.url = self.url.replace('bdsmlr_', '') if u'bdsmlr.com/post/' in self.url: return self.Invalid(tr_(u'개별 다운로드는 지원하지 않습니다: {}').format(self.url), fail=False) self.url = 'https://{}.bdsmlr.com'.format(self.id_) self.session = Session() clf2.solve(self.url, session=self.session, cw=self.customWidget)
def init(self): self.url_main = 'https://www.artstation.com/{}'.format(self.id.replace('artstation_', '', 1).replace('/', '/')) if '/artwork/' in self.url: pass#raise NotImplementedError('Single post') else: self.url = self.url_main self.session = Session()
async def get_current_user(self) -> Session: """ 实例化session并获取用户session数据 若不使用自带session实现请重写本方法 """ if not self.get_secure_cookie('session_id'): self.set_secure_cookie('session_id', uuid4().hex) self.Session = Session.Session(self) await self.Session.get_data() return self.Session.data
def read(self): format = compatstr( self.ui_setting.youtubeFormat.currentText()).lower().strip() session = Session() video = get_video(self.url, session, format) self.urls.append(video.url) self.setIcon(video.thumb) self.title = video.title
def real_url(url, session=None, cw=None): print_ = get_print(cw) if session is None: session = Session() data = clf2.solve(url, session=session, cw=cw) url_new = data['url'] print('url_new:', url_new) if url_new != url: url_new = urljoin(url_new, '/' + u'/'.join(url.split('/')[3:])) # print_(u'[redirect domain] {} -> {}'.format(url, url_new)) return url_new
def get_video(url, session=None): if session is None: session = Session() session.headers['User-Agent'] = downloader.hdr['User-Agent'] session.headers['X-Directive'] = 'api' html = downloader.read_html(url, session=session) soup = Soup(html) for script in soup.findAll('script'): script = script.text or script.string or '' data = re.find('window.__NUXT__=(.+)', script) if data is not None: data = data.strip() if data.endswith(';'): data = data[:-1] data = json.loads(data) break else: raise Exception('No __NUXT__') info = data['state']['data']['video']['hentai_video'] query = info['slug'] #url_api = 'https://members.hanime.tv/api/v3/videos_manifests/{}?'.format(query) # old url_api = 'https://hanime.tv/rapi/v7/videos_manifests/{}?'.format( query) # new print(url_api) hdr = { 'x-signature': ''.join('{:x}'.format(randrange(16)) for i in range(32)), 'x-signature-version': 'web2', 'x-time': str(int(time())), } r = session.get(url_api, headers=hdr) print(r) data = json.loads(r.text) streams = [] for server in data['videos_manifest']['servers']: streams += server['streams'] streams_good = [] for stream in streams: url_video = stream['url'] if not url_video or 'deprecated.' in url_video: continue streams_good.append(stream) if not streams_good: raise Exception('No video available') print('len(streams_good):', len(streams_good)) for stream in streams_good: print(stream['extension'], stream['width'], stream['filesize_mbs'], stream['url']) stream = streams_good[0] return Video(info, stream), session
def init(self): cw = self.cw self.session = Session() res = clf2.solve(self.url, self.session, cw) soup = Soup(res['html']) if is_captcha(soup): def f(html): return not is_captcha(Soup(html)) clf2.solve(self.url, self.session, cw, show=True, f=f)
def set_session(self, user): sess_id = str(uuid4()) current_session = Session(username=user, sess_id=sess_id, created=datetime.now()) session_cookie = Cookie.SimpleCookie() session_cookie['session_id'] = sess_id session_cookie['session_id']['path'] = "/" # set the session self.sessions[sess_id] = current_session self.default_header.append( ('Set-Cookie', session_cookie.output(header='').strip()))
def read_channel(url_page, cw=None): print_ = get_print(cw) res = re.find(CHANNEL_PATTERN, url_page) if res is None: raise Exception('Not channel') header, username = res print(header, username) max_pid = get_max_range(cw) info = {} info['header'] = header info['username'] = username session = Session() urls = [] ids = set() for p in range(100): url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p)) print_(url_api) r = session.post(url_api) data = json.loads(r.text) videos = data.get('videos') #4530 if not videos: print_('empty') break for video in videos: id_ = video['id'] if id_ in ids: print_('duplicate: {}'.format(id_)) continue ids.add(id_) info['name'] = video['pn'] urls.append(urljoin(url_page, video['u'])) if len(urls) >= max_pid: break n = data['nb_videos'] s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls)) if cw: cw.setTitle(s) else: print(s) if len(ids) >= n: break sleep(1, cw) if not urls: raise Exception('no videos') info['urls'] = urls[:max_pid] return info
def get_session(self): try: session_cookie = Cookie.SimpleCookie( self.environ.get('HTTP_COOKIE', "")) session_morsel = session_cookie.get('session_id', Cookie.Morsel()) current_session = self.sessions.get( session_morsel.value, Session(created=datetime(1970, 1, 1))) if current_session.created < (datetime.now() - timedelta(hours=2)): self.sessions.pop(current_session.sess_id, None) return bool(self.sessions.get(session_morsel.value, False)) except: sys.stderr.write(traceback.format_exc()) return False
def get_session(url, cw=None): #res = clf2.solve(url, cw=cw) #return res['session'] session = Session() sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid') if sessionid is None or sessionid.is_expired(): raise errors.LoginRequired() session.headers['User-Agent'] = downloader.hdr['User-Agent'] if not session.cookies.get('csrftoken', domain='.instagram.com'): csrf_token = generate_csrf_token() print('csrf:', csrf_token) session.cookies.set("csrftoken", csrf_token, domain='.instagram.com') return session
def init(self): self.session = Session() # 1791 if 'pornhub_gif_' in self.url: self.url = 'https://www.pornhub.com/gif/{}'.format( self.url.replace('pornhub_gif_', '')) elif 'pornhub_album_' in self.url: self.url = 'https://www.pornhub.com/album/{}'.format( self.url.replace('pornhub_album_', '')) elif 'pornhub_' in self.url: self.url = 'https://www.pornhub.com/view_video.php?viewkey={}'\ .format(self.url.replace('pornhub_', '')) if 'pornhubpremium.com' in self.url.lower() and\ not is_login(self.session, self.cw): return self.Invalid('[Pornhub] Login cookies required')
def read(self): self.session = Session() self.session.cookies.set('_ac', '1', domain='.video.fc2.com') info = get_info(self.url, self.session, self.cw) video = info['videos'][0] self.urls.append(video.url) f = BytesIO() downloader.download(video.url_thumb, referer=self.url, buffer=f) self.setIcon(f) self.title = info['title']
def get(self, url): if self._url_video: return self._url_video cw = self.cw print_ = get_print(cw) html = downloader.read_html(url) soup = Soup(html) embedUrl = extract('embedUrl', html, cw) if embedUrl: raise EmbedUrlError('[pandoratv] EmbedUrl: {}'.format(embedUrl)) uid = extract('strLocalChUserId', html, cw) pid = extract('nLocalPrgId', html, cw) fid = extract('strFid', html, cw) resolType = extract('strResolType', html, cw) resolArr = extract('strResolArr', html, cw) vodSvr = extract('nVodSvr', html, cw) resols = extract('nInfo', html, cw) runtime = extract('runtime', html, cw) url_api = 'http://www.pandora.tv/external/getExternalApi/getVodUrl/' data = { 'userId': uid, 'prgId': pid, 'fid': fid, 'resolType': resolType, 'resolArr': ','.join(map(str, resolArr)), 'vodSvr': vodSvr, 'resol': max(resols), 'runtime': runtime, 'tvbox': 'false', 'defResol': 'true', 'embed': 'false', } session = Session() r = session.post(url_api, headers={'Referer': url}, data=data) data = json.loads(r.text) self._url_video = data['src'] self.title = soup.find('meta', {'property': 'og:description'})['content'] ext = get_ext(self._url_video) self.filename = format_filename(self.title, pid, ext) self.url_thumb = soup.find('meta', {'property': 'og:image'})['content'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) return self._url_video
def read_channel(url_page, cw=None): print_ = get_print(cw) res = re.find(CHANNEL_PATTERN, url_page) if res is None: raise Exception('Not channel') header, username = res print(header, username) max_pid = get_max_range(cw, 2000) info = {} info['header'] = header info['username'] = username session = Session() urls = [] urls_set = set() for p in range(100): url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p)) print(url_api) r = session.post(url_api, data='main_cats=false') soup = Soup(r.text) thumbs = soup.findAll('div', class_='thumb-block') if not thumbs: print_('empty') break for thumb in thumbs: info['name'] = thumb.find('span', class_='name').text.strip() href = thumb.find('a')['href'] href = urljoin(url_page, href) if href in urls_set: print_('duplicate: {}'.format(href)) continue urls_set.add(href) urls.append(href) if len(urls) >= max_pid: break s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls)) if cw: if not cw.alive: return cw.setTitle(s) else: print(s) if not urls: raise Exception('no videos') info['urls'] = urls[:max_pid] return info
def get_soup(url, session=None): if session is None: session = Session() def f(html, browser=None): soup = Soup(html) if soup.find('form', {'name': 'fcaptcha'}): #4660 browser.show() return False browser.hide() return True res = clf2.solve(url, session=session, f=f) soup = Soup(res['html'], apply_css=True) return session, soup, res['url']
def init(self): type = self.url.split('sankakucomplex.com')[0].split('//')[-1].strip('.').split('.')[-1] if type == '': type = 'www' if type not in ['chan', 'idol', 'www']: raise Exception('Not supported subdomain') self.type_sankaku = type self.url = self.url.replace('&commit=Search', '') self.url = clean_url(self.url) self.session = Session() if self.type_sankaku != 'www': login(type, self.session, self.cw) if self.type_sankaku == 'www': html = downloader.read_html(self.url, session=self.session) self.soup = Soup(html)
def init(self): self.url = self.url.replace('sankaku_', '') if '/post/' in self.url: return self.Invalid('Single post is not supported') if 'sankakucomplex.com' in self.url: self.url = self.url.replace('http://', 'https://') type = self.url.split('sankakucomplex.com')[0].split( '//')[-1].strip('.').split('.')[-1] if type == '': type = 'www' if type not in ['chan', 'idol', 'www']: raise Exception('Not supported subdomain') else: url = self.url url = url.replace(' ', '+') while '++' in url: url = url.replace('++', '+') url = urllib.quote(url) url = url.replace('%2B', '+') url = url.replace('%20', '+') # if url.startswith('[chan]'): type = 'chan' url = url.replace('[chan]', '', 1).strip() elif url.startswith('[idol]'): type = 'idol' url = url.replace('[idol]', '', 1).strip() elif url.startswith('[www]'): type = 'www' url = url.replace('[www]', '', 1).strip() else: raise Exception('Not supported subdomain') self.url = u'https://{}.sankakucomplex.com/?tags={}'.format( type, url) self.type_sankaku = type self.url = self.url.replace('&commit=Search', '') self.url = clean_url(self.url) self.session = Session() if self.type_sankaku != 'www': login(type, self.session, self.customWidget) if self.type_sankaku == 'www': html = downloader.read_html(self.url, session=self.session) self.soup = Soup(html)