def bsGetAllTokens(account_address): if not w3.isAddress(account_address): raise KeyError('Invalid address!') url = f'https://etherscan.io/address/{account_address}' scraper = cloudscraper.CloudScraper() response = scraper.get(url) soup = bs(response.text, 'html.parser') if not soup.find(id="ContentPlaceHolder1_tokenbalance"): raise ValueError('No tokens found on this account!') token_list = soup.find("ul", {"class": "list list-unstyled mb-0"}) tokens = token_list.findAll( "span", { "class": "list-amount link-hover__item hash-tag hash-tag--md text-truncate" }) result = [] for token in tokens: token_obj = token.text.split(maxsplit=1) token_obj[0] = Decimal(token_obj[0].replace(',', '')) result.append(token_obj) with open('token_list.txt', 'w') as f: s = '' for token in result: s += f'{token[0]} of {token[1]}\n' f.write(s) return result
def bsGetTokenBalanceAtTime(balance_date, account_address, contract_address): #Checking if account address is okay if not w3.isAddress(account_address): raise KeyError('Invalid account address!') # Calling the url for given address to check if it's valid url = f'https://api.etherscan.io/api?module=contract&action=getabi&address={contract_address}&apikey={apiToken}' response = requests.get(url).json() if response['status'] != "1": raise ConnectionError('Invalid contract!') ##Use this if statement and ddate conversion outside of django #balance_date = datetime.strptime(balance_date, '%Y-%m-%d') #if balance_date > datetime.now(): #Use this for django if balance_date > date.today(): raise ValueError('Date is in the future!') dt = correctDateFormating(balance_date) scraper = cloudscraper.CloudScraper() #Bypassing cloudflare #Populating required parameters for form submission rp = scraper.get('https://etherscan.io/tokencheck-tool') sp = bs(rp.text, 'html.parser') event_target = sp.find(id="__EVENTTARGET").get('value') event_argument = sp.find(id="__EVENTARGUMENT").get('value') view_state = sp.find(id="__VIEWSTATE").get('value') view_state_gen = sp.find(id="__VIEWSTATEGENERATOR").get('value') event_validation = sp.find(id="__EVENTVALIDATION").get('value') params = { '__EVENTTARGET': event_target, '__EVENTARGUMENT': event_argument, '__VIEWSTATE': view_state, '__VIEWSTATEGENERATOR': view_state_gen, '__EVENTVALIDATION': event_validation, 'ctl00$ContentPlaceHolder1$tokenbalance': 'tokenbalance', 'ctl00$ContentPlaceHolder1$txtAccount': account_address, 'ctl00$ContentPlaceHolder1$txtAddress': contract_address, 'date': dt, 'ctl00$ContentPlaceHolder1$txtBlockNo': '', 'ctl00$ContentPlaceHolder1$Button1': 'Lookup' } #Scraping the response for Eth balance response = scraper.post('https://etherscan.io/tokencheck-tool', data=params) soup = bs(response.text, 'html.parser') #Taking the token info from soup token = soup.findAll("span", {"class": "text-size-1"})[2].text.split() token_balance = Decimal(token[0].replace(',', '')) token_name = token[1] result_dict = {} result_dict['token_name'] = token_name result_dict['token_balance'] = token_balance return result_dict
def make_sesssion(self): logger.debug("initializing cloudscraper") return cloudscraper.CloudScraper(browser={ 'browser': 'chrome', 'platform': 'windows', 'mobile': False, 'desktop': True, })
def __init__(self, headers, TOKEN): self.scraper = cloudscraper.CloudScraper() self.headers = {} self.TOKEN = TOKEN self.headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0" self.headers['Cookie'] = headers self.SEC = self.getSEC() self.JavaSoftwares = ['Vanilla', 'Spigot', 'Forge', 'Magma','Snapshot', 'Bukkit', 'Paper', 'Modpacks', 'Glowstone'] self.BedrockSoftwares = ['Bedrock', 'Pocketmine-MP']
def __init__(self, threadNum=5): self.plugins = load_plugins() self.threadNum = threadNum self.items = Queue() self.tempdata = {} self.images = {} self.session = cloudscraper.CloudScraper() self.block_size = 1024 # 1MB
def __init__(self, username, password, twofac=None, cookie=None, config=None): """ The bumper object itself. If username/password are left empty but a cookie is provided, it will log in with that instead. Within this __init__ function the logger and cloudflare scraper will be started, and the program will check if the login details are correct or not. :param username: OGU account name :param password: OGU password :param twofac: current 2FA code :param cookie: `ogusersbbuser` cookie :return: none """ self.logger = logging.getLogger(__name__) self.logger.debug("Initializing data") self.__data = Data() if config: self.logger.debug( f"Loading config from argument with type '{type(config)}'") self.config = config else: self.logger.warning("Make sure to set a config before continuing") self.logger.debug("Initializing the session") session_data = {"browser": {"browser": "chrome", "desktop": True}} if self.__config.get("captcha"): self.logger.debug("Loading CAPTCHA login info") session_data["recaptcha"] = self.__config["captcha"] self.session = cloudscraper.CloudScraper(**session_data) if cookie: self.logger.debug("Loading user from cookie") self.session.cookies["ogusersmybbuser"] = cookie if not self.logged_in: raise InvalidUser("Incorrect login details", username, password) else: self.logger.debug("Loading user from username and password") if not self.login(username, password, twofac=twofac): raise InvalidUser("Incorrect login details", username, password) self.logger.info("Initialized the bumper successfully")
def run(self): Analyzer.run(self) if self.data_type == 'ip': try: data = self.get_data() scraper = cloudscraper.CloudScraper() headers = { 'Host': 'talosintelligence.com', 'Referer': 'https://talosintelligence.com/reputation_center/lookup?search={}' .format(data), 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36', 'Accept': 'application/json' } response_details = scraper.get( 'https://talosintelligence.com/sb_api/query_lookup', headers=headers, params={ 'query': '/api/v2/details/ip/', 'query_entry': data }) response_location = scraper.get( 'https://talosintelligence.com/sb_api/query_lookup', headers=headers, params={ 'query': '/api/v2/location/ip/', 'query_entry': data }) if response_details.status_code == 200 | 201: if response_location.status_code == 200 | 201: result = response_details.json() result['country'] = response_location.json().get( 'country', None) self.report(result if len(result) > 0 else {}) else: self.error( 'Failed to query Talos location. Status_code {}'. format(response_location.status_code)) else: self.error( 'Failed to query Talos details. Status_code {}'.format( response_details.status_code)) except Exception as e: self.unexpectedError(e) else: self.notSupported()
def EnableCloudscraper(self, enable: bool = True): ## # # Enables/disabled the cloudscraper. Resets the session. # # @param enable Enable the cloudscraper? # ## self._session = cloudscraper.CloudScraper() if enable else Session()
def cloudScraperRequest(url, method, **kwargs): headers = kwargs.pop('headers', {}) cookies = kwargs.pop('cookies', {}) params = kwargs.pop('params', {}) global scraper if scraper is None: scraper = cloudscraper.CloudScraper() scraper.headers.update(headers) scraper.cookies.update(cookies) req = scraper.request(method, url, data=params) return req
def _connect(self): self.session = cloudscraper.CloudScraper() self.session.cookies = http.cookiejar.LWPCookieJar(self.cookie_jar) if os.path.isfile(self.cookie_jar): self.session.cookies.load(ignore_discard=True, ignore_expires=True) response = self.session.get( "https://connect.garmin.com/modern/settings", allow_redirects=False) if response.status_code != 200: self._LOG.info("Authenticate user '%s'", self.username) self._authenticate() else: self._LOG.info("User '%s' already authenticated", self.username)
def __init__(self, email, password): """Create a new class instance.""" self.username = email self.password = password self.session = cloudscraper.CloudScraper() self.sso_rest_client = ApiClient(self.session, "sso.garmin.com/sso", aditional_headers=self.garmin_headers) self.modern_rest_client = ApiClient( self.session, "connect.garmin.com/modern", aditional_headers=self.garmin_headers, ) self.display_name = None
def __init__(self, email, password, is_cn=False): """ Init module """ global BASE_URL global SSO_URL global SIGNIN_URL self.email = email self.password = password self.req = cloudscraper.CloudScraper() self.logger = logging.getLogger(__name__) self.display_name = "" self.full_name = "" self.unit_system = "" self.is_cn = is_cn if is_cn: BASE_URL = BASE_URL.replace(".com", ".cn") SSO_URL = SSO_URL.replace(".com", ".cn") SIGNIN_URL = SIGNIN_URL.replace(".com", ".cn") self.url_user_summary = BASE_URL + '/proxy/usersummary-service/usersummary/daily/' self.url_user_summary_chart = BASE_URL + \ '/proxy/wellness-service/wellness/dailySummaryChart/' self.url_heartrates = BASE_URL + '/proxy/wellness-service/wellness/dailyHeartRate/' self.url_sleepdata = BASE_URL + '/proxy/wellness-service/wellness/dailySleepData/' self.url_body_composition = BASE_URL + \ '/proxy/weight-service/weight/daterangesnapshot' self.url_activities = BASE_URL + \ '/proxy/activitylist-service/activities/search/activities' self.url_hydrationdata = BASE_URL + '/proxy/usersummary-service/usersummary/hydration/daily/' self.url_activity = BASE_URL + '/proxy/activity-service/activity/' self.url_personal_record = BASE_URL + '/proxy/personalrecord-service/personalrecord/' self.url_tcx_download = BASE_URL + "/proxy/download-service/export/tcx/activity/" self.url_gpx_download = BASE_URL + "/proxy/download-service/export/gpx/activity/" self.url_kml_download = BASE_URL + "/proxy/download-service/export/kml/activity/" self.url_fit_download = BASE_URL + "/proxy/download-service/files/activity/" self.url_csv_download = BASE_URL + "/proxy/download-service/export/csv/activity/" self.url_device_list = BASE_URL + '/proxy/device-service/deviceregistration/devices' self.url_device_service = BASE_URL + \ '/proxy/device-service/deviceservice/' self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', 'origin': 'https://sso.garmin.com' if not self.is_cn else "https://sso.garmin.cn" }
def cloudScraperRequest(url, method, **kwargs): headers = kwargs.pop('headers', {}) cookies = kwargs.pop('cookies', {}) params = kwargs.pop('params', {}) scraper = cloudscraper.CloudScraper() if Prefs['captcha_enable']: scraper.captcha = { 'provider': Prefs['captcha_type'], 'api_key': Prefs['captcha_key'] } scraper.headers.update(headers) scraper.cookies.update(cookies) req = scraper.request(method, url, data=params) return req
def bsGetBalanceAtTime(balance_date, address): if not w3.isAddress(address): raise KeyError('Invalid address!') #This conversion is needed while testing outside of django #balance_date = datetime.strptime(balance_date, '%Y-%m-%d') if balance_date > date.today(): raise ValueError('Date is in the future!') dt = correctDateFormating(balance_date) scraper = cloudscraper.CloudScraper() #Bypassing cloudflare #Populating required parameters for form submission rp = scraper.get('https://etherscan.io/balancecheck-tool') sp = bs(rp.text, 'html.parser') event_target = sp.find(id="__EVENTTARGET").get('value') event_argument = sp.find(id="__EVENTARGUMENT").get('value') view_state = sp.find(id="__VIEWSTATE").get('value') view_state_gen = sp.find(id="__VIEWSTATEGENERATOR").get('value') event_validation = sp.find(id="__EVENTVALIDATION").get('value') params = { '__EVENTTARGET': event_target, '__EVENTARGUMENT': event_argument, '__VIEWSTATE': view_state, '__VIEWSTATEGENERATOR': view_state_gen, '__EVENTVALIDATION': event_validation, 'ctl00$ContentPlaceHolder1$txtAddress': address, 'date': dt, 'ctl00$ContentPlaceHolder1$txtBlockNo': '', 'ctl00$ContentPlaceHolder1$Button1': 'Lookup' } #Scraping the response for Eth balance response = scraper.post('https://etherscan.io/balancecheck-tool', data=params) soup = bs(response.text, 'html.parser') #Testing html with open('test.html', 'w') as f: f.write(response.text) balance = soup.find("span", { "class": "text-size-1 text-break" }).text.split() #To save this in the database, we must remove the text, so we can convert the balance ether_balance = Decimal(balance[0].replace(',', '')) return ether_balance
def __init__(self, email, password, auth_domain): """ Init module """ self.email = email self.password = password self.req = httpx.AsyncClient(timeout=TIME_OUT) self.cf_req = cloudscraper.CloudScraper() self.URL_DICT = (GARMIN_CN_URL_DICT if auth_domain and str(auth_domain).upper() == "CN" else GARMIN_COM_URL_DICT) self.modern_url = self.URL_DICT.get("MODERN_URL") self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", "origin": self.URL_DICT.get("SSO_URL_ORIGIN"), }
def manual_login(): for retry in range(0, 2): s = cloudscraper.CloudScraper() r = s.get("https://www.udemy.com/join/signup-popup/", ) soup = bs(r.text, "html5lib") csrf_token = soup.find("input", {"name": "csrfmiddlewaretoken"})["value"] data = { "csrfmiddlewaretoken": csrf_token, "locale": "en_US", "email": "*****@*****.**", "password": "******", } s.headers.update( {"Referer": "https://www.udemy.com/join/signup-popup/"}) try: r = s.post( "https://www.udemy.com/join/login-popup/?locale=en_US", data=data, allow_redirects=False, ) except cloudscraper.exceptions.CloudflareChallengeError: continue if r.status_code == 302: return "", r.cookies["client_id"], r.cookies[ "access_token"], csrf_token else: soup = bs(r.content, "html5lib") txt = soup.find( "div", class_="alert alert-danger js-error-alert").string.strip() if txt[0] == "Y": return "Too many logins per hour try later", "", "", "" elif txt[0] == "T": return "Email or password incorrect", "", "", "" else: return txt, "", "", "" return "Cloudflare is blocking your requests try again after an hour", "", "", ""
def GetWebContent(self, bangou): # XXX: improve link = "https://www.r18.com/common/search/searchword=" + bangou scraper = cloudscraper.CloudScraper() response = scraper.get(link) self.soup = BeautifulSoup(response.text, "html.parser") item_list = self.soup.select(".item-list") # print(item_list) if not item_list: return "" try: # TODO: check try range for item in item_list: link = item.select_one('a')['href'] # print(link) response = scraper.get(link) self.soup = BeautifulSoup(response.text, "html.parser") infos = self.soup.select_one(".product-details").dl print('infos', infos) # print('infos children', infos.children()) self.infoDict = dict() key = '' value = '' for child in infos.children(): print(child) if child.name == 'dt': key = child.getText() elif child.name == 'dd': value = child.getText() if key and value: print(key, value) self.infoDict[key.strip()] = value.strip() key = '' value = '' # if self.infoDict['ID'] and self.infoDict['ID'] == bangou: # return link except Exception as ex: # TODO: get web content failed print(ex) return link return link
def getSession(pageNum): zoomInfoSessionFirefox = cloudscraper.CloudScraper(browser={ 'browser': 'firefox', 'mobile': False, 'platform': 'windows' }) getURL = str(str(companyLink) + '?pageNum=' + str(pageNum)) try: zoomInfoGetPage = zoomInfoSessionFirefox.get(getURL, allow_redirects=True) except cloudscraper.exceptions.CloudflareChallengeError as e: print('\n[-] Error encountered, retrying request...\n') time.sleep(1) try: zoomInfoGetPage = zoomInfoSessionFirefox.get(getURL, allow_redirects=True) except: print('\n[-] Unrecoverable error, exiting...\n') pass return zoomInfoGetPage
def __init__(self): """Create a new Download class instance.""" logger.debug("__init__") self.session = cloudscraper.CloudScraper() self.sso_rest_client = RestClient( self.session, 'sso.garmin.com', 'sso', aditional_headers=self.garmin_headers) self.modern_rest_client = RestClient( self.session, 'connect.garmin.com', 'modern', aditional_headers=self.garmin_headers) self.activity_service_rest_client = RestClient.inherit( self.modern_rest_client, "proxy/activity-service/activity") self.download_service_rest_client = RestClient.inherit( self.modern_rest_client, "proxy/download-service/files") self.gc_config = GarminConnectConfigManager() self.download_days_overlap = 3 # Existing donloaded data will be redownloaded and overwritten if it is within this number of days of now.
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.wg = WebRequest.WebGetRobust(chromium_headless=False) # This is.... kind of horrible. self.wg.errorOutCount = 1 # proxy = SocksProxy.ProxyLauncher([TwoCaptchaSolver.TWOCAPTCHA_IP]) recaptcha_params = { 'provider': 'anticaptcha', 'api_key': settings["captcha"]["anti-captcha"]['api_key'], # 'proxy' : proxy.get_wan_address(), # 'proxytype' : "SOCKS5", } self.req = cloudscraper.CloudScraper( recaptcha = recaptcha_params, ) self.req.headers.update(self.wg.browserHeaders)
def parse_page(self, url): \ # self.logger.info(f'Parsing started for: {url}') print(f'Parsing started for: {url}') # login and start the session scraper = cloudscraper.CloudScraper() self.login() scraper.post(url=self.login_url, data=self.credentials) # create the BeautifulSoup object soup = BeautifulSoup( scraper.get(url=url, headers=self.headers).content, 'html5lib') # identify the set and change to directory for it self.identify_set(soup) # find all the images linked to on the page urls = self.find_image_links(url, soup) if urls is not None: # download the images self.download_files_parallel(urls) else: print("no images found") # find all the videos linked to on the page urls = self.find_video_links(url, soup) if urls is not None: # download the videos super().download_files_parallel(urls) else: print("no videos found") # self.logger.info(f'Parsing complete for: {url}') print(f'Parsing complete for: {url}')
# data-data stock_code = [] stock_name = [] stock_listingDate = [] stock_shares = [] stock_listingBoard = [] # data-data lq45 lq45_code = [] lq45_name = [] lq45_listingDate = [] lq45_shares = [] lq45_listingBoard = [] # http client http = cloudscraper.CloudScraper() while True: # buat link link = f"https://idx.co.id/umbraco/Surface/StockData/GetSecuritiesStock?code=§or=&board=&start={start}&length={length}" # send request result = http.get(link).text result = json.loads(result) # result empty? # kalo iya, berarti daftar # emitennya sudah habis if result["data"] == []: break else:
def __init__(self, email, password, is_cn=False, session_data=None): """Create a new class instance.""" self.session_data = session_data self.username = email self.password = password self.is_cn = is_cn self.garmin_connect_base_url = "https://connect.garmin.com" self.garmin_connect_sso_url = "sso.garmin.com/sso" self.garmin_connect_modern_url = "connect.garmin.com/modern" self.garmin_connect_css_url = "https://static.garmincdn.com/com.garmin.connect/ui/css/gauth-custom-v1.2-min.css" if self.is_cn: self.garmin_connect_base_url = "https://connect.garmin.cn" self.garmin_connect_sso_url = "sso.garmin.cn/sso" self.garmin_connect_modern_url = "connect.garmin.cn/modern" self.garmin_connect_css_url = "https://static.garmincdn.cn/cn.garmin.connect/ui/css/gauth-custom-v1.2-min.css" self.garmin_connect_login_url = self.garmin_connect_base_url + "/en-US/signin" self.garmin_connect_sso_login = "******" self.garmin_connect_devices_url = ( "proxy/device-service/deviceregistration/devices") self.garmin_connect_device_url = "proxy/device-service/deviceservice" self.garmin_connect_weight_url = "proxy/weight-service/weight/dateRange" self.garmin_connect_daily_summary_url = ( "proxy/usersummary-service/usersummary/daily") self.garmin_connect_metrics_url = "proxy/metrics-service/metrics/maxmet/daily" self.garmin_connect_daily_hydration_url = ( "proxy/usersummary-service/usersummary/hydration/daily") self.garmin_connect_personal_record_url = ( "proxy/personalrecord-service/personalrecord/prs") self.garmin_connect_earned_badges_url = ( "proxy/badge-service/badge/earned") self.garmin_connect_adhoc_challenges_url = ( "proxy/adhocchallenge-service/adHocChallenge/historical") self.garmin_connect_badge_challenges_url = ( "proxy/badgechallenge-service/badgeChallenge/completed") self.garmin_connect_daily_sleep_url = ( "proxy/wellness-service/wellness/dailySleepData") self.garmin_connect_daily_stress_url = "proxy/wellness-service/wellness/dailyStress" self.garmin_connect_rhr = "proxy/userstats-service/wellness/daily" self.garmin_connect_user_summary_chart = ( "proxy/wellness-service/wellness/dailySummaryChart") self.garmin_connect_heartrates_daily_url = ( "proxy/wellness-service/wellness/dailyHeartRate") self.garmin_connect_daily_respiration_url = ( "proxy/wellness-service/wellness/daily/respiration") self.garmin_connect_daily_spo2_url = ( "proxy/wellness-service/wellness/daily/spo2") self.garmin_connect_activities = ( "proxy/activitylist-service/activities/search/activities") self.garmin_connect_activity = "proxy/activity-service/activity" self.garmin_connect_fit_download = "proxy/download-service/files/activity" self.garmin_connect_tcx_download = "proxy/download-service/export/tcx/activity" self.garmin_connect_gpx_download = "proxy/download-service/export/gpx/activity" self.garmin_connect_kml_download = "proxy/download-service/export/kml/activity" self.garmin_connect_csv_download = "proxy/download-service/export/csv/activity" self.garmin_connect_gear = "proxy/gear-service/gear/filterGear" self.garmin_connect_logout = "auth/logout/?url=" self.garmin_headers = {"NK": "NT"} self.session = cloudscraper.CloudScraper() self.sso_rest_client = ApiClient( self.session, self.garmin_connect_sso_url, aditional_headers=self.garmin_headers, ) self.modern_rest_client = ApiClient( self.session, self.garmin_connect_modern_url, aditional_headers=self.garmin_headers, ) self.display_name = None self.full_name = None self.unit_system = None
def test_js_challenge_21_05_2015(self, **kwargs): scraper = cloudscraper.CloudScraper(**kwargs) expect(scraper.get(url).content).to.equal(requested_page)
from bs4 import BeautifulSoup import requests, re from time import sleep import cloudscraper import os.path scraper = cloudscraper.CloudScraper(delay=5, browser={ "browser": "chrome", "platform": "windows", "mobile": False, "desktop": True, }) base_url = "https://www.fanfiction.net/" genres = ("Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy", "Friendship", "General", "Horror", "Humor", "Hurt", "Mystery", "Parody", "Poetry", "Romancy", "Sci-Fi", "Spiritual", "Supernatural", "Suspense", "Tragedy", "Western") class Scraper: def get_categories(self): categories = [] html_url = scraper.get(base_url).text soup = BeautifulSoup(html_url, "lxml") find_categories_container = soup.find(class_="dropdown-menu") find_categories = find_categories_container.find_all("a")
def _get_session(self, record=None, email=None, password=None): session = cloudscraper.CloudScraper() # JSIG CAS, cool I guess. # Not quite OAuth though, so I'll continue to collect raw credentials. # Commented stuff left in case this ever breaks because of missing parameters... data = { 'username': email, 'password': password, '_eventId': 'submit', 'embed': 'true', # 'displayNameRequired': 'false' } params = { 'service': 'https://connect.garmin.com/modern', # 'redirectAfterAccountLoginUrl': 'http://connect.garmin.com/modern', # 'redirectAfterAccountCreationUrl': 'http://connect.garmin.com/modern', # 'webhost': 'olaxpw-connect00.garmin.com', 'clientId': 'GarminConnect', 'gauthHost': 'https://sso.garmin.com/sso', # 'rememberMeShown': 'true', # 'rememberMeChecked': 'false', 'consumeServiceTicket': 'false', # 'id': 'gauth-widget', # 'embedWidget': 'false', # 'cssUrl': 'https://static.garmincdn.com/com.garmin.connect/ui/src-css/gauth-custom.css', # 'source': 'http://connect.garmin.com/en-US/signin', # 'createAccountShown': 'true', # 'openCreateAccount': 'false', # 'usernameShown': 'true', # 'displayNameShown': 'false', # 'initialFocus': 'true', # 'locale': 'en' } headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', 'Referer': 'https://jhartman.pl', 'origin': 'https://sso.garmin.com' } # I may never understand what motivates people to mangle a perfectly good protocol like HTTP in the ways they do... preResp = session.get('https://sso.garmin.com/sso/signin', params=params, headers=headers) if preResp.status_code != 200: raise APIException('SSO prestart error %s %s' % (preResp.status_code, preResp.text)) ssoResp = session.post('https://sso.garmin.com/sso/login', params=params, data=data, allow_redirects=False, headers=headers) if ssoResp.status_code != 200 or 'temporarily unavailable' in ssoResp.text: raise APIException('SSO error %s %s' % (ssoResp.status_code, ssoResp.text)) if '>sendEvent(\'FAIL\')' in ssoResp.text: raise APIException('Invalid login') if '>sendEvent(\'ACCOUNT_LOCKED\')' in ssoResp.text: raise APIException('Account Locked') if 'renewPassword' in ssoResp.text: raise APIException('Reset password') # self.print_cookies(cookies=session.cookies) # ...AND WE'RE NOT DONE YET! gcRedeemResp = session.get('https://connect.garmin.com/modern', allow_redirects=False, headers=headers) if gcRedeemResp.status_code != 302: raise APIException(f'GC redeem-start error {gcRedeemResp.status_code} {gcRedeemResp.text}') url_prefix = 'https://connect.garmin.com' # There are 6 redirects that need to be followed to get the correct cookie # ... :( max_redirect_count = 7 current_redirect_count = 1 while True: url = gcRedeemResp.headers['location'] # Fix up relative redirects. if url.startswith('/'): url = url_prefix + url url_prefix = '/'.join(url.split('/')[:3]) gcRedeemResp = session.get(url, allow_redirects=False) if (current_redirect_count >= max_redirect_count and gcRedeemResp.status_code != 200): raise APIException(f'GC redeem {current_redirect_count}/' '{max_redirect_count} error ' '{gcRedeemResp.status_code} ' '{gcRedeemResp.text}') if gcRedeemResp.status_code in [200, 404]: break current_redirect_count += 1 if current_redirect_count > max_redirect_count: break # self.print_cookies(session.cookies) session.headers.update(headers) return session
if sys.version_info[0] > 2: from requests.exceptions import HTTPError, ConnectionError, Timeout, RequestException else: from requests import HTTPError, ConnectionError, Timeout, RequestException import copy, re import xbmc, xbmcgui, xbmcvfs from strings import * from serviceLib import * import cloudscraper from contextlib import contextmanager sess = cloudscraper.create_scraper() scraper = cloudscraper.CloudScraper() serviceName = 'playlist' playlists = [ 'playlist_1', 'playlist_2', 'playlist_3', 'playlist_4', 'playlist_5' ] class PlaylistUpdater(baseServiceUpdater): def __init__(self, instance_number): self.serviceName = serviceName + "_{}".format(instance_number) self.instance_number = str(instance_number) self.localMapFile = 'playlistmap.xml' baseServiceUpdater.__init__(self) self.servicePriority = int(
import re import enum import time from contextlib import suppress # from urllib.request import Request, urlopen from bs4 import BeautifulSoup import cloudscraper from fuzzywuzzy import fuzz, process # constants HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" } SITE_DOMAIN = "https://subscene.com" request_session = cloudscraper.CloudScraper() # utils def soup_for(url): url = re.sub("\s", "+", url) # r = Request(url, data=None, headers=HEADERS) # html = urlopen(r).read().decode("utf-8") html = request_session.get(url, headers=HEADERS) return BeautifulSoup(html.content, "html.parser") class AttrDict(): def __init__(self, *attrs): self._attrs = attrs
def get_story_metadata(self): if re.search(URL_VALIDATE, self.BaseUrl): logger.info(f"Processing {self.BaseUrl}") self.scraper = cloudscraper.CloudScraper(delay=2, browser={ 'browser': 'chrome', 'platform': 'windows', 'mobile': False, 'desktop': True, }) response = self.scraper.get(self.BaseUrl) logger.debug(f"GET: {response.status_code}: {response.url}") ffn_soup = BeautifulSoup(response.content, 'html.parser') try: self.ffn_story_name = ffn_soup.find_all( 'b', 'xcontrast_txt')[0].string.strip() except IndexError: # Story Not Found logger.error("ffn_story_name is missing.") self.ffn_story_name = None return self.ffn_story_id = (re.search(r"\d+", self.BaseUrl)).group(0) self.ffn_author_name = ffn_soup.find_all( 'a', {'href': re.compile(r'^/u/\d+/.')})[0].string.strip() self.ffn_author_url = (ffn_soup.find('div', attrs={ 'id': 'profile_top' }).find('a', href=True))['href'] self.ffn_author_id = (re.search(r"\d+", self.ffn_author_url)).group(0) try: self.ffn_story_summary = ffn_soup.find_all( 'div', { 'style': 'margin-top:2px', 'class': 'xcontrast_txt' })[0].string.strip() except IndexError: # Missing summary logger.error("ffn_story_summary is missing.") self.ffn_story_summary = "" self.ffn_story_fandom = ffn_soup.find('span', attrs={ 'class': 'lc-left' }).find('a', attrs={ 'class': 'xcontrast_txt' }).text # if the fandom isnt crossover, then go to the next <a> if not re.search(r"\bcrossover\b", self.ffn_story_fandom, re.IGNORECASE): self.ffn_story_fandom = ffn_soup.find('span', attrs={ 'class': 'lc-left' }).find( 'a', attrs={ 'class': 'xcontrast_txt' }).findNext('a').text self.details = ffn_soup.find_all( 'span', {'class': 'xgray xcontrast_txt'})[0].text.split(' - ') self.dates = [ date for date in ffn_soup.find_all('span') if date.has_attr('data-xutime') ] for i in range(0, len(self.details)): if self.details[i].startswith('Updated:'): self.ffn_story_status = "In-Progress" self.ffn_story_last_updated = datetime.fromtimestamp( int(self.dates[0]['data-xutime'])) self.ffn_story_published = datetime.fromtimestamp( int(self.dates[1]['data-xutime'])) # Published date # change formatting self.ffn_story_last_updated = self.ffn_story_last_updated.strftime( r'%Y-%m-%d') self.ffn_story_published = self.ffn_story_published.strftime( r'%Y-%m-%d') break # if found, exit the loop to prevent overwriting of the variable elif self.details[i].startswith('Published:'): self.ffn_story_status = "Completed" # if Updated not found, pub & last_up will be same self.ffn_story_last_updated = str( datetime.fromtimestamp( int(self.dates[0] ['data-xutime']))) # Published date self.ffn_story_published = str( datetime.fromtimestamp( int(self.dates[0] ['data-xutime']))) # Published dat # change formatting self.ffn_story_last_updated = datetime.strptime( self.ffn_story_last_updated, '%Y-%m-%d %H:%M:%S') self.ffn_story_published = datetime.strptime( self.ffn_story_published, '%Y-%m-%d %H:%M:%S') self.ffn_story_last_updated = self.ffn_story_last_updated.strftime( r'%-d %b, %Y ') self.ffn_story_published = self.ffn_story_published.strftime( r'%-d %b, %Y ') for i in range(0, len(self.details)): if self.details[i].startswith('Reviews:'): self.ffn_story_reviews = self.details[i].replace( 'Reviews:', '').strip() break # if found, exit the loop to prevent overwriting of the variable else: self.ffn_story_reviews = 'Not found' for i in range(0, len(self.details)): if self.details[i].startswith('Favs:'): self.ffn_story_favs = self.details[i].replace('Favs:', '').strip() break # if found, exit the loop to prevent overwriting of the variable else: self.ffn_story_favs = 'Not found' for i in range(0, len(self.details)): if self.details[i].startswith('Follows:'): self.ffn_story_follows = self.details[i].replace( 'Follows:', '').strip() break # if found, exit the loop to prevent overwriting of the variable else: self.ffn_story_follows = 'Not found' for i in range(0, len(self.details)): if self.details[i].startswith('Rated:'): self.ffn_story_rating = self.details[i].replace( 'Rated:', '').strip() break # if found, exit the loop to prevent overwriting of the variable else: self.ffn_story_rating = 'Not found' self.ffn_story_lang = self.details[1] self.ffn_story_genre = self.details[2] self.ffn_story_characters = self.details[3] if re.search(r'\d', str(self.ffn_story_characters)): self.ffn_story_characters = "Not Found" search = [x for x in self.details if x.startswith("Words:")] if len(search) == 0: self.ffn_story_length = 0 else: self.ffn_story_length = int(search[0][len("Words:"):].replace( ',', '')) self.ffn_story_length = "{:,}".format( int(self.ffn_story_length)) search = [x for x in self.details if x.startswith("Chapters:")] if len(search) == 0: self.ffn_story_chapters = 1 # 1 as the default chapter number else: self.ffn_story_chapters = str( int(search[0][len("Chapters:"):].replace(',', ''))).strip() self.ffn_author_url = "https://www.fanfiction.net" + self.ffn_author_url # remove everything after &sa from the BaseUrl if re.search(r"^(.*?)&", self.BaseUrl) is not None: self.BaseUrl = re.search(r"^(.*?)&", self.BaseUrl).group(1) else: logger.error("BaseUrl is invalid")
password = namespace.password if not credentials: if not username: username = input("Username:"******"Password:"******"the dir specified does not exists: %s", CONFIG_DIR) mkdir(CONFIG_DIR) if namespace.save: with open(path.join(CONFIG_DIR, "accounts.json"), 'w', encoding='utf-8') as f: json.dump(accountInfo, f, indent=4) exit(0) for username, password in credentials: crunchyrollGuestPassFinder = CrunchyrollGuestPassFinder(cloudscraper.CloudScraper()) if crunchyrollGuestPassFinder.login(username, password) and not namespace.dry_run: logging.info("logged into %s", username) if crunchyrollGuestPassFinder.isAccountNonPremium(): crunchyrollGuestPassFinder.findGuestPassAndActivateAccount() else: logging.info("Account '%s' is already premium", username)