async def cb_sticker(event): "To get list of sticker packs with given name." split = event.pattern_match.group(1) if not split: return await edit_delete(event, "`Provide some name to search for pack.`", 5) catevent = await edit_or_reply(event, "`Searching sticker packs....`") scraper = cloudscraper.create_scraper() text = scraper.get(combot_stickers_url + split).text soup = bs(text, "lxml") results = soup.find_all("div", {"class": "sticker-pack__header"}) if not results: return await edit_delete(catevent, "`No results found :(.`", 5) reply = f"**Sticker packs found for {split} are :**" for pack in results: if pack.button: packtitle = (pack.find("div", "sticker-pack__title")).get_text() packlink = (pack.a).get("href") packid = (pack.button).get("data-popup") reply += f"\n **• ID: **`{packid}`\n [{packtitle}]({packlink})" await catevent.edit(reply)
def create_scrapper_session(useCloudscraper=True, retries=10, backoff_factor=0.3, status_forcelist=(500, 502, 504, 423)): session = None if useCloudscraper: session = cloudscraper.create_scraper() else: session = Session() retry = Retry( total=retries, read=retries, connect=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist, ) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) return session
def test_user_agent(self, **kwargs): for browser in ['chrome', 'firefox']: scraper = cloudscraper.create_scraper(browser=browser) assert browser in scraper.headers['User-Agent'].lower() # Check it can't find browsers.json with pytest.raises(RuntimeError, match=r".*?User-Agent was not found\."): scraper = cloudscraper.create_scraper(browser='bad_match') # Check mobile and desktop disabled with pytest.raises( RuntimeError, match= r"Sorry you can't have mobile and desktop disabled at the same time\." ): scraper = cloudscraper.create_scraper(browser={ 'browser': 'chrome', 'desktop': False, 'mobile': False }) # check brotli scraper = cloudscraper.create_scraper(browser='chrome', allow_brotli=False) assert 'br' not in scraper.headers['Accept-Encoding'] # test custom User-Agent scraper = cloudscraper.create_scraper(browser={'custom': 'test'}) assert scraper.headers['User-Agent'] == 'test' # check its matched chrome and loaded correct cipherSuite scraper = cloudscraper.create_scraper(browser={ 'custom': '50.0.9370.394', 'tryMatchCustom': True }) assert any('!' not in _ for _ in scraper.user_agent.cipherSuite) # check it didn't match anything and loaded custom cipherSuite scraper = cloudscraper.create_scraper(browser={ 'custom': 'aa50.0.9370.394', 'tryMatchCustom': True }) assert any('!' in _ for _ in scraper.user_agent.cipherSuite)
def query(self): """ 向接口查询子域并做子域匹配 """ time.sleep(self.delay) # 绕过cloudFlare验证 scraper = cloudscraper.create_scraper() scraper.interpreter = 'js2py' scraper.proxies = self.get_proxy(self.source) url = self.addr + self.domain try: resp = scraper.get(url, timeout=self.timeout) except Exception as e: logger.log('ERROR', e.args) return if not resp: return subdomains = self.match(self.domain, str(resp.json())) # 合并搜索子域名搜索结果 self.subdomains = self.subdomains.union(subdomains)
def __init__(self, profileJson, _found): self.profielPropeties = profileJson self.found = _found if _found: self.cSession = cloudscraper.create_scraper() self.display_name = profileJson["displayName"] self.username = profileJson["username"] self.bio = profileJson["aboutMe"] self.avatar_url = profileJson["avatarFileName"] self.user_id = profileJson["id"] self.followersCount = profileJson["followerCount"] self.anonymousFollowerCount = profileJson["anonymousFollowerCount"] self.isFollowingcount = profileJson["followingCount"] self.active = profileJson["isActive"] self.tellsCount = profileJson["tellCount"] self.tells = [] self.followers = [] self.followings = [] else: return
def cb_sticker(update: Update, context: CallbackContext): msg = update.effective_message split = msg.text.split(" ", 1) if len(split) == 1: msg.reply_text("Provide some name to search for pack.") return scraper = cloudscraper.create_scraper() text = scraper.get(combot_stickers_url + split[1]).text soup = bs(text, "lxml") results = soup.find_all("a", {"class": "sticker-pack__btn"}) titles = soup.find_all("div", "sticker-pack__title") if not results: msg.reply_text("No results found :(.") return reply = f"Stickers for *{split[1]}*:" for result, title in zip(results, titles): link = result["href"] reply += f"\n• [{title.get_text()}]({link})" msg.reply_text(reply, parse_mode=ParseMode.MARKDOWN)
def linkExtract(): target = input( "[+] Target ( like https://google.com/ ) with http/https \t") name = "links" scraper = cloudscraper.create_scraper() source = scraper.get(target).text soup = BeautifulSoup(source, 'html.parser') links = [] for link in soup.find_all(attrs={'href': re.compile("http")}): links.append(link.get('href')) for link in soup.find_all(attrs={'href': re.compile("https")}): links.append(link.get('href')) print("\n") print(*set(links), sep="\n") with open(home + "/webgather/" + name+"-links.txt", 'w+') as f: for item in links: f.write("%s\n" % item) print(Fore.LIGHTMAGENTA_EX + "\n [!] Found {0} links (some were duplicate). results Also saved in {1}/webgather/{2}-links.txt \n" .format(len(links), home, name)) input("") recreate()
def bgDownloadAssets(): global config scraper = cloudscraper.create_scraper() apkPage = BeautifulSoup(scraper.get("https://apkpure.com/golf-blitz/com.noodlecake.ssg4/download").text, features="html.parser") apkLink = apkPage.find("a", id="download_link")['href'] apkVersion = apkPage.find("span", attrs={"class": "file"}).text if not "apkVersion" in config: config["apkVersion"] = "" if config["apkVersion"] != apkVersion: print("downloading new apk for assets") apkPath = os.path.join(bot_globals.resources_path, "golfblitz.apk") with scraper.get(apkLink, stream=True) as dl: #requests.get(apkLink, stream=True) with open(apkPath, "wb") as f: for chunk in dl.iter_content(chunk_size=16384): f.write(chunk) with zipfile.ZipFile(apkPath, 'r') as to_unzip: to_unzip.extractall(bot_globals.resources_path) print("apk has been downloaded and extracted") config["apkVersion"] = apkVersion json.dump(config, open(os.path.join(confPath, "main-configuration.json"), 'w')) bot_globals.update_hats_and_golfers()
def get_lab_urls(date): ''' returns all 4 lab urls from poelab.com will return None for each if date on poelab doesnt match provided date (has not been updated yet) date is format: %Y-%m-%d''' labpages = [] ret = [] scraper = cloudscraper.create_scraper() with scraper.get('https://www.poelab.com/') as r: etree = lxmlhtml.fromstring(r.text) labpages = etree.xpath('//h2/a[@class="redLink"]/@href') for url in reversed(labpages[:4]): with scraper.get(url) as r: etree = lxmlhtml.fromstring(r.text) t = etree.xpath('//img[@id="notesImg"]/@src') if t: t = t[0] if date not in t: ret.append(None) else: ret.append(t) return ret
def __init__( self, language="English", client_id="KzEZED7aC0vird8jWyHM38mXjNTY", client_secret="W9JZoJe00qPvJsiyCGT3CCtC6ZUtdpKpzMbNlUGP", ): self.language = language self.client_id = client_id self.client_secret = client_secret self.account = None self.access_token = None self.refresh_token = None # Using cloudscraper over a simple session allows us to # get around Cloudflare. self.session = cloudscraper.create_scraper() self.session.headers.update(HEADERS) if self.language: # pragma: no cover self.session.headers.update({"Accept-Language": self.language})
def update_user_agents() -> None: if not HAS_CF: # The website with the UAs is behind Cloudflare's anti-bot page, we need cloudscraper return today = datetime.now() ua_path = get_homedir() / 'user_agents' / str(today.year) / f'{today.month:02}' safe_create_dir(ua_path) ua_file_name: Path = ua_path / f'{today.date().isoformat()}.json' if ua_file_name.exists(): # Already have a UA for that day. return try: s = cloudscraper.create_scraper() r = s.get('https://techblog.willshouse.com/2012/01/03/most-common-user-agents/') except Exception: traceback.print_exc() return to_store = ua_parser(r.text) with open(ua_file_name, 'w') as f: json.dump(to_store, f, indent=2)
def scrape(research_network: str): ids = get_ids(research_network, 2272481) data = get_existing_data(research_network) completed_ids = set([d["id"] for d in data]) scraper = cloudscraper.create_scraper() for id in tqdm(ids): if id in completed_ids: continue url = f"https://papers.ssrn.com/sol3/papers.cfm?abstract_id={id}" response = scraper.get(url) save_abstract_data(research_network, { "id": id, "url": url, "error": response.status_code != 200, "status_code": response.status_code, "html": remove_blank_space(response.text), })
def anifo(URL): URL = d_url(URL) #URL = 'https://www.oploverz.in/series/one-piece-sub-indo/' # scraper = cfscrape.create_scraper() scraper = cloudscraper.create_scraper() soup = BeautifulSoup(scraper.get(URL).content, 'html.parser') desc = soup.find('span', class_='desc') listinfo = soup.find('div', class_='listinfo') img_des = soup.find('div', class_='imgdesc') img = img_des.findChildren("img", recursive=False) a = desc.prettify(formatter="html5") b = listinfo.prettify(formatter="html5") c = a.replace('"', "\"") d = b.replace('"', "\"") e = html.escape(c) f = html.escape(d) ret = {'desc': e, 'info': f, 'img': img[0]["src"]} jsona = json.dumps(ret) return jsona
def get_eps_list(URL): URL = d_url(URL) scraper = cloudscraper.create_scraper() soup = BeautifulSoup(scraper.get(URL).content, 'html.parser') episodelist = soup.find(class_='episodelist') ret = [] for li in episodelist.find_all("li"): eps = li.find(class_="leftoff") judul = li.find(class_="lefttitle") dt = li.find(class_="rightoff") alink = eps.find("a") con = { 'link': alink.get('href'), 'eps': eps.get_text().strip(), 'judul': judul.get_text().strip(), 'date': dt.get_text().strip() } ret.append(con) return json.dumps(ret)
def get_jav_html(url_list: List[Union[int, str]]) -> str: """获取 javlibrary 网页内容;使用 cloudscraper 跳过 cloudflare 验证 :param url_list:[0]-errorTimes,[1]-url,[2]-proxy :return: scraper.text """ scraper = cloudscraper.create_scraper(browser="chrome") while url_list[0] != 6: try: rqs: Response = Response() if len(url_list) == 2: rqs = scraper.get(url_list[1]) elif len(url_list) == 3: rqs = scraper.get(url_list[1], proxies=url_list[2]) rqs.encoding = 'utf-8' return rqs.text except Exception as e: sleep(5) if url_list[0] == 5: raise e url_list[0] += 1
def pixabay_index(request): if not request.COOKIES.get('api'): return redirect('pixabay_ask_api') if request.method == 'GET': query = request.GET.get('q') if query: return render( request, 'stock_photos_explorer/pixabay-query.html', { 'title': 'Search Result for {} - Pixabay API Explorer'.format( query).title(), 'theme': 'light' }) image = bs( cloudscraper.create_scraper().get('https://pixabay.com').content, 'lxml').find('picture').find('source')['srcset'] return render(request, 'stock_photos_explorer/pixabay-index.html', { 'title': 'Pixabay API Explorer', 'image': image })
def scrape(): BASE_URL = "https://www.booksandcoupons.com/" course_list = [] course_dict = {} udemy_url = [] browser = {} scraper = cloudscraper.create_scraper(browser) # Creating a cloudscraper object so that will bypass any cloudscraper protection try: r = scraper.get(BASE_URL) ## base url except: raise ValueError("Fetching site error") try: soup = BS(r.text, "html.parser") # Use beutifulsoup to pasre the html data extract information data. header_courses = soup.find("h3", class_="post-title entry-title") for i, tag in enumerate(header_courses): course_list.insert(1, (tag.text, tag["href"])) courses = soup.find_all("h3", class_="post-title entry-title") for no, link in enumerate(courses): course_link = link.find("a") course_list.append((course_link.text, course_link["href"])) except error: print(error) for i, j in dict((course_list)).items(): course_dict.setdefault(i, j) for name, urls in course_dict.items(): r2 = scraper.get(urls) if r2.status_code == 200: soup2 = BS(r2.text, "html.parser") tags2 = soup2.find_all("a", string="ENROLL NOW") if len(tags2) == 1: udemy_url.append((current_date, name, tags2[0]["href"])) else: udemy_url.append((current_date, name, tags2[1]["href"])) return udemy_url
def main(): addseptag() print('开始加载链接和代理...') addseptag() data, links, proxy = importLinkProxy() proxyList = [] try: proxyList = setupProxyFromUser(proxy) except: proxyList = setupProxyFromIp(proxy) headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36', } with open('2captcha.txt') as f: apiKey = f.read() for i in range(len(links)): scraper = cloudscraper.create_scraper(interpreter='nodejs', recaptcha={ 'provider': '2captcha', 'api_key': apiKey }) try: r = scraper.get(links[i], headers=headers, proxies=proxyList[i]) if r.status_code == 200 or r.status_code == 301: print('激活成功') status = '0' outputToCsvStatus(i, status, data) else: print('激活失败') status = '-1' outputToCsvStatus(i, status, data) except: print('激活失败') status = '-1' outputToCsvStatus(i, status, data)
def __init__(self): self._destroyed = False self.executor = futures.ThreadPoolExecutor(max_workers=4) # Initialize cloudscrapper try: self.scraper = cloudscraper.create_scraper( browser={ 'platform': 'linux', 'mobile': False } ) except Exception as err: logger.exception('Failed to initialize cloudscraper') self.scraper = Session() # end try # Must resolve these fields inside `read_novel_info` self.novel_title = 'N/A' self.novel_author = 'N/A' self.novel_cover = None self.is_rtl = False # Each item must contain these keys: # `id` - 1 based index of the volume # `title` - the volume title (can be ignored) self.volumes = [] # Each item must contain these keys: # `id` - 1 based index of the chapter # `title` - the title name # `volume` - the volume id of this chapter # `volume_title` - the volume title (can be ignored) # `url` - the link where to download the chapter self.chapters = [] # Other stuffs - not necessary to resolve from crawler instance. self.home_url = '' self.novel_url = '' self.last_visited_url = None
def WaxWalletLogin(login, password, userToken2fa, captcha): scraper = cloudscraper.create_scraper() data = { "password": password, "username": login, "g-recaptcha-response": captcha, "redirectTo": "" } response = json.loads( scraper.post("https://all-access.wax.io/api/session", data).text) print(response) data = {"code": Get2FA(userToken2fa), "token2fa": response["token2fa"]} response = scraper.post("https://all-access.wax.io/api/session/2fa", data) print(response.text) response = scraper.get("https://all-access.wax.io/api/session") print(response.text) result = {"token": json.loads(response.text)["token"]} print(result)
def xur_weapon(): scraper = cloudscraper.create_scraper( delay=5, recaptcha={'provider': 'return_response'}) html = get_xur('https://whereisxur.com/') soup = BeautifulSoup(html, 'html.parser') xur_first_weapon = soup.find('div', class_="et_pb_blurb_0").find( 'h4', class_='et_pb_module_header').find('span').string url_first_weapon = soup.find( 'div', class_="et_pb_blurb_0").find('noscript').find("img")["src"] first_img = scraper.get(url_first_weapon, stream=True) with open('xur_img/img_first_weapon.png', 'wb') as f: f.write(first_img.content) xur_second_weapon = soup.find('div', class_="et_pb_blurb_1").find( 'h4', class_='et_pb_module_header').find('span').string url_second_weapon = soup.find( 'div', class_="et_pb_blurb_1").find('noscript').find("img")["src"] second_img = scraper.get(url_second_weapon, stream=True) with open('xur_img/img_second_weapon.png', 'wb') as f: f.write(second_img.content) xur_third_weapon = soup.find('div', class_="et_pb_blurb_2").find( 'h4', class_='et_pb_module_header').find('span').string url_third_weapon = soup.find( 'div', class_="et_pb_blurb_2").find('noscript').find("img")["src"] third_img = scraper.get(url_third_weapon, stream=True) with open('xur_img/img_third_weapon.png', 'wb') as f: f.write(third_img.content) xur_fourth_weapon = soup.find('div', class_="et_pb_blurb_3").find( 'h4', class_='et_pb_module_header').find('span').string url_fourth_weapon = soup.find( 'div', class_="et_pb_blurb_3").find('noscript').find("img")["src"] fourth_img = scraper.get(url_fourth_weapon, stream=True) with open('xur_img/img_fourth_weapon.png', 'wb') as f: f.write(fourth_img.content) save_xur_weapon(db, xur_first_weapon, xur_second_weapon, xur_third_weapon, xur_fourth_weapon)
def fetch_course_data_by_class_id(self, class_id): url = 'https://api.skillshare.com/classes/{}'.format(class_id) scraper = cloudscraper.create_scraper(browser={ 'custom': 'Skillshare/4.1.1; Android 5.1.1', }, delay=10) res = scraper.get( url, headers={ 'Accept': 'application/vnd.skillshare.class+json;,version=0.8', 'User-Agent': 'Skillshare/5.3.0; Android 9.0.1', 'Host': 'api.skillshare.com', 'Referer': 'https://www.skillshare.com/', 'cookie': self.cookie, }) if not res.status_code == 200: raise Exception('Fetch error, code == {}'.format(res.status_code)) return res.json()
def on_task_input(self, task, config): try: import cloudscraper except ImportError as e: logger.debug('Error importing cloudscraper: {}', e) raise plugin.DependencyError( issued_by='cfscraper', missing='cloudscraper', message='CLOudscraper module required. ImportError: %s' % e, ) scraper = cloudscraper.create_scraper() category = config['category'] persistence = SimplePersistence(plugin='magnetdl') last_magnet = persistence.get(category, None) logger.debug('last_magnet: {}', last_magnet) first_magnet = None stop = False for page in range(0, config['pages']): logger.verbose('Retrieving {} page {}', category, page + 1) url = self._url(category, page) logger.debug('Url: {}', url) try: for entry in self.parse_page(scraper, url): if first_magnet is None: first_magnet = entry['url'] logger.debug('Set first_magnet to {}', first_magnet) persistence[category] = first_magnet if last_magnet == entry['url']: logger.debug('Found page where we have left, stopping') stop = True yield entry except Page404Error: logger.warning('Page {} returned 404, stopping', page) return if stop: return time.sleep(random.randint(1, 5))
def json_download(self, chapter_id): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Accept-Encoding': 'gzip, deflate' } sess = requests.session() sess = cloudscraper.create_scraper(sess) search_url = "http://www.mangaeden.com/api/manga/{0}/".format(chapter_id) connection = sess.get(search_url, headers=headers) if connection.status_code != 200: print("Whoops! Seems like I can't connect to website.") print("It's showing : %s" % connection) print("Run this script with the --verbose argument and report the issue along with log file on Github.") sys.exit(1) else: json_data = connection.content return json_data
def _get_category(query): url = "https://api.leboncoin.fr/api/parrot/v1/complete?q={query.replace(' ', '%20')}" anti_captcha = create_scraper(browser="chrome") res = anti_captcha.get(url).json() print(type(res)) if res: assert isinstance( res, list), "Unexpected answer received from API: {res!r}" return str(res[0]["cat_id"]) else: # No category returned return None
def update_user_agents(): if not HAS_CF: # The website with the UAs is behind Cloudflare's anti-bot page, we need cloudscraper return today = datetime.now() ua_path = get_homedir() / 'user_agents' / str( today.year) / f'{today.month:02}' safe_create_dir(ua_path) ua_file_name: Path = ua_path / f'{today.date().isoformat()}.json' if ua_file_name.exists(): # Already have a UA for that day. return try: s = cloudscraper.create_scraper() r = s.get( 'https://techblog.willshouse.com/2012/01/03/most-common-user-agents/' ) except Exception: traceback.print_exc() return soup = BeautifulSoup(r.text, 'html.parser') uas = soup.find_all('textarea')[1].text to_store = {'by_frequency': []} for ua in json.loads(uas): os = ua['system'].split(' ')[-1] if os not in to_store: to_store[os] = {} browser = ' '.join(ua['system'].split(' ')[:-1]) if browser not in to_store[os]: to_store[os][browser] = [] to_store[os][browser].append(ua['useragent']) to_store['by_frequency'].append({ 'os': os, 'browser': browser, 'useragent': ua['useragent'] }) with open(ua_file_name, 'w') as f: json.dump(to_store, f, indent=2)
def update_addon(self, addon_name): addon_link = self.config['addons'][addon_name]['link'] addon_last_update = self.config['addons'][addon_name]['last_update'] scraper = cloudscraper.create_scraper() r = scraper.get(addon_link) soup = BeautifulSoup(r.text, features='html.parser') addon_name = soup.find("meta", property="og:title")["content"] last_update = soup.find('abbr')['title'] converted_time = self.convert_datetime(last_update.split()[:4]) if converted_time > addon_last_update: self.remove_addon(addon_name) download_page = scraper.get(f'{addon_link}/download') download_soup = BeautifulSoup(download_page.text, features='html.parser') link = download_soup.find('p', { 'class': 'text-sm' }).find('a')['href'] download_link = f'http://www.curseforge.com{link}' files = scraper.get(download_link) existing_addons = os.listdir(self.addon_path) with open(os.path.join(self.addon_path, 'addon.zip'), 'wb') as f: f.write(files.content) with ZipFile(os.path.join(self.addon_path, 'addon.zip'), 'r') as zipobj: zipobj.extractall(self.addon_path) os.remove(os.path.join(self.addon_path, 'addon.zip')) all_addons = os.listdir(self.addon_path) new_files = [x for x in all_addons if x not in existing_addons] self.config['addons'][addon_name]['last_update'] = converted_time self.config['addons'][addon_name]['files'] = new_files self.save_config()
def create_novel(self): try: print("Initializing...") scrapper = cloudscraper.create_scraper() page = scrapper.get(self.novel_link) soup = BeautifulSoup(page.text, 'html.parser') # Get Novel Name self.novel_name = soup.find(class_='title').get_text() # Get the html that stores links to each chapter chapters = soup.find_all(class_='rowChapter') # Get all the specified links from the html chapter_links = [] for chapter in chapters: chapter_links.append(chapter.find('a').get('href')) chapter_links.reverse() # Reverse the list so the first index will be the first chapter print("Starting...") book = EpubEngine(self.novel_name, self.storage_path) book.addCover(self.storage_path + "/cover.png") print("Added Cover") current_chapter = 1 self.download_chapters(current_chapter, scrapper, chapter_links, book) book.createEpub() self.update_gui('END') except Exception as e: if 'Missing Node.js' in str(e): self.update_gui("NODEJS") else: print(e) self.update_gui('ERROR')
def Scrap_For_TOC(url): """scrapes the html for the ToC information""" issue = "" publication_type = "" publication_info = [] scraper = cloudscraper.create_scraper() web_text = scraper.get(url).text soup = BeautifulSoup(web_text, features="lxml") temp_soup = soup.get_text().replace('\n\n', '') temp_soup = ''.join(temp_soup) temp_soup = temp_soup.split('\n') parsed_soup = [] start_stop_parsing = 0 start_stop_pub_info = 0 #x=0 for line in temp_soup: #soup.get_text(): #print(line) if 'Facebook pageRSS FeedsMost recent' in line: start_stop_parsing = 1 #print("start_stop = 1") elif 'ToolsSubmit an Article' in line: start_stop_parsing = 0 #print("start_stop = 0") if start_stop_parsing == 1: parsed_soup.append(str(line)) #print(line) if 'Select / Deselect allExport Citation(s)Export' in line: issue = line if 'Open Access' in line: start_stop_pub_info = 1 elif 'Full text' in line: start_stop_pub_info = 0 if start_stop_pub_info == 1 and start_stop_parsing == 1: publication_info.append(line) #x += 1 print(issue) print(publication_info)
def gethtml(url, req='', headers='', interpreter='nodejs'): # session = requests.session() # session = cfscrape.create_scraper() session = cloudscraper.create_scraper(interpreter=interpreter) session.mount('file://', LocalFileAdapter()) cookies_ = ConfigParser() cookies_.read('cookies') session.cookies['sess_id'] = cookies_.get('COOKIES', 'sess_id') session.cookies['session_id'] = cookies_.get('COOKIES', 'sess_id') #lang, lang2, forcesub, forceusa, localizecookies, quality, onlymainsub, connection_n_, proxy_ = config() config_ = config() if config_['forceusa']: session.cookies['sess_id'] = cookies_.get('COOKIES', 'sess_id_usa') session.cookies['session_id'] = cookies_.get('COOKIES', 'sess_id_usa') del session.cookies['c_visitor'] if not config_['forceusa'] and config_['localizecookies']: session.cookies['c_locale'] = \ {u'Español (Espana)': 'esES', u'Français (France)': 'frFR', u'Português (Brasil)': 'ptBR', u'English': 'enUS', u'Español': 'esLA', u'Türkçe': 'enUS', u'Italiano': 'itIT', u'العربية': 'arME', u'Deutsch': 'deDE', u'Русский': 'ruRU'}[config_['language']] if not urlparse(url).scheme and not urlparse(url).netloc: print('Apparently not a URL') sys.exit() if headers == '': headers = { 'Referer': 'http://crunchyroll.com/', 'Host': 'www.crunchyroll.com', 'User-Agent': 'Mozilla/5.0 Windows NT 6.1; rv:26.0 Gecko/20100101 Firefox/26.0' } res = session.get(url, params=req, headers=headers) res.encoding = 'UTF-8' #print(session.get(url, params=req, headers=headers).url) #open('page.html', 'a',encoding='UTF-8').write(res.text) return res.text