Ejemplos de CloudScraper en Python, ejemplos de cloudscraper.CloudScraper en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: crawler.py Proyecto: Mire96/EtherView

def bsGetAllTokens(account_address):
    if not w3.isAddress(account_address):
        raise KeyError('Invalid address!')
    url = f'https://etherscan.io/address/{account_address}'
    scraper = cloudscraper.CloudScraper()
    response = scraper.get(url)
    soup = bs(response.text, 'html.parser')
    if not soup.find(id="ContentPlaceHolder1_tokenbalance"):
        raise ValueError('No tokens found on this account!')

    token_list = soup.find("ul", {"class": "list list-unstyled mb-0"})
    tokens = token_list.findAll(
        "span", {
            "class":
            "list-amount link-hover__item hash-tag hash-tag--md text-truncate"
        })
    result = []
    for token in tokens:
        token_obj = token.text.split(maxsplit=1)
        token_obj[0] = Decimal(token_obj[0].replace(',', ''))
        result.append(token_obj)

    with open('token_list.txt', 'w') as f:
        s = ''
        for token in result:
            s += f'{token[0]} of {token[1]}\n'
        f.write(s)

    return result

Ejemplo n.º 2

0

Mostrar archivo

Archivo: crawler.py Proyecto: Mire96/EtherView

def bsGetTokenBalanceAtTime(balance_date, account_address, contract_address):
    #Checking if account address is okay
    if not w3.isAddress(account_address):
        raise KeyError('Invalid account address!')

    # Calling the url for given address to check if it's valid
    url = f'https://api.etherscan.io/api?module=contract&action=getabi&address={contract_address}&apikey={apiToken}'
    response = requests.get(url).json()

    if response['status'] != "1":
        raise ConnectionError('Invalid contract!')

    ##Use this if statement and ddate conversion outside of django
    #balance_date = datetime.strptime(balance_date, '%Y-%m-%d')
    #if balance_date >  datetime.now():

    #Use this for django
    if balance_date > date.today():
        raise ValueError('Date is in the future!')

    dt = correctDateFormating(balance_date)
    scraper = cloudscraper.CloudScraper()  #Bypassing cloudflare

    #Populating required parameters for form submission
    rp = scraper.get('https://etherscan.io/tokencheck-tool')
    sp = bs(rp.text, 'html.parser')
    event_target = sp.find(id="__EVENTTARGET").get('value')
    event_argument = sp.find(id="__EVENTARGUMENT").get('value')
    view_state = sp.find(id="__VIEWSTATE").get('value')
    view_state_gen = sp.find(id="__VIEWSTATEGENERATOR").get('value')
    event_validation = sp.find(id="__EVENTVALIDATION").get('value')
    params = {
        '__EVENTTARGET': event_target,
        '__EVENTARGUMENT': event_argument,
        '__VIEWSTATE': view_state,
        '__VIEWSTATEGENERATOR': view_state_gen,
        '__EVENTVALIDATION': event_validation,
        'ctl00$ContentPlaceHolder1$tokenbalance': 'tokenbalance',
        'ctl00$ContentPlaceHolder1$txtAccount': account_address,
        'ctl00$ContentPlaceHolder1$txtAddress': contract_address,
        'date': dt,
        'ctl00$ContentPlaceHolder1$txtBlockNo': '',
        'ctl00$ContentPlaceHolder1$Button1': 'Lookup'
    }

    #Scraping the response for Eth balance
    response = scraper.post('https://etherscan.io/tokencheck-tool',
                            data=params)
    soup = bs(response.text, 'html.parser')

    #Taking the token info from soup
    token = soup.findAll("span", {"class": "text-size-1"})[2].text.split()
    token_balance = Decimal(token[0].replace(',', ''))
    token_name = token[1]

    result_dict = {}
    result_dict['token_name'] = token_name
    result_dict['token_balance'] = token_balance
    return result_dict

Ejemplo n.º 3

0

Mostrar archivo

 def make_sesssion(self):
     logger.debug("initializing cloudscraper")
     return cloudscraper.CloudScraper(browser={
             'browser': 'chrome',
             'platform': 'windows',
             'mobile': False,
             'desktop': True,
             })

Ejemplo n.º 4

0

Mostrar archivo

Archivo: __init__.py Proyecto: Matias-source/AternosAPI

 def __init__(self, headers, TOKEN):
     self.scraper = cloudscraper.CloudScraper()
     self.headers = {}
     self.TOKEN = TOKEN
     self.headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0"
     self.headers['Cookie'] = headers
     self.SEC = self.getSEC()
     self.JavaSoftwares = ['Vanilla', 'Spigot', 'Forge', 'Magma','Snapshot', 'Bukkit', 'Paper', 'Modpacks', 'Glowstone']
     self.BedrockSoftwares = ['Bedrock', 'Pocketmine-MP']

Ejemplo n.º 5

0

Mostrar archivo

    def __init__(self, threadNum=5):
        self.plugins = load_plugins()
        self.threadNum = threadNum
        self.items = Queue()
        self.tempdata = {}
        self.images = {}

        self.session = cloudscraper.CloudScraper()
        self.block_size = 1024  # 1MB

Ejemplo n.º 6

0

Mostrar archivo

Archivo: bumper.py Proyecto: 4ldas/Bumper

    def __init__(self,
                 username,
                 password,
                 twofac=None,
                 cookie=None,
                 config=None):
        """
		The bumper object itself. If username/password are left empty
		but a cookie is provided, it will log in with that instead.

		Within this __init__ function the logger and cloudflare scraper
		will be started, and the program will check if the login details
		are correct or not.

		:param username: OGU account name
		:param password: OGU password
		:param twofac: current 2FA code
		:param cookie: `ogusersbbuser` cookie

		:return: none
		"""
        self.logger = logging.getLogger(__name__)

        self.logger.debug("Initializing data")
        self.__data = Data()

        if config:
            self.logger.debug(
                f"Loading config from argument with type '{type(config)}'")
            self.config = config
        else:
            self.logger.warning("Make sure to set a config before continuing")

        self.logger.debug("Initializing the session")

        session_data = {"browser": {"browser": "chrome", "desktop": True}}

        if self.__config.get("captcha"):
            self.logger.debug("Loading CAPTCHA login info")
            session_data["recaptcha"] = self.__config["captcha"]

        self.session = cloudscraper.CloudScraper(**session_data)

        if cookie:
            self.logger.debug("Loading user from cookie")
            self.session.cookies["ogusersmybbuser"] = cookie
            if not self.logged_in:
                raise InvalidUser("Incorrect login details", username,
                                  password)
        else:
            self.logger.debug("Loading user from username and password")
            if not self.login(username, password, twofac=twofac):
                raise InvalidUser("Incorrect login details", username,
                                  password)

        self.logger.info("Initialized the bumper successfully")

Ejemplo n.º 7

0

Mostrar archivo

    def run(self):
        Analyzer.run(self)
        if self.data_type == 'ip':
            try:
                data = self.get_data()

                scraper = cloudscraper.CloudScraper()

                headers = {
                    'Host':
                    'talosintelligence.com',
                    'Referer':
                    'https://talosintelligence.com/reputation_center/lookup?search={}'
                    .format(data),
                    'User-Agent':
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
                    'Accept':
                    'application/json'
                }

                response_details = scraper.get(
                    'https://talosintelligence.com/sb_api/query_lookup',
                    headers=headers,
                    params={
                        'query': '/api/v2/details/ip/',
                        'query_entry': data
                    })

                response_location = scraper.get(
                    'https://talosintelligence.com/sb_api/query_lookup',
                    headers=headers,
                    params={
                        'query': '/api/v2/location/ip/',
                        'query_entry': data
                    })

                if response_details.status_code == 200 | 201:
                    if response_location.status_code == 200 | 201:
                        result = response_details.json()
                        result['country'] = response_location.json().get(
                            'country', None)
                        self.report(result if len(result) > 0 else {})
                    else:
                        self.error(
                            'Failed to query Talos location. Status_code {}'.
                            format(response_location.status_code))
                else:
                    self.error(
                        'Failed to query Talos details. Status_code {}'.format(
                            response_details.status_code))
            except Exception as e:
                self.unexpectedError(e)
        else:
            self.notSupported()

Ejemplo n.º 8

0

Mostrar archivo

    def EnableCloudscraper(self, enable: bool = True):

        ##
        #
        # Enables/disabled the cloudscraper. Resets the session.
        #
        # @param enable Enable the cloudscraper?
        #
        ##

        self._session = cloudscraper.CloudScraper() if enable else Session()

Ejemplo n.º 9

0

Mostrar archivo

Archivo: utils.py Proyecto: umsuny9/PGMA-Modernized

def cloudScraperRequest(url, method, **kwargs):
    headers = kwargs.pop('headers', {})
    cookies = kwargs.pop('cookies', {})
    params = kwargs.pop('params', {})
    global scraper

    if scraper is None:
        scraper = cloudscraper.CloudScraper()
        scraper.headers.update(headers)
        scraper.cookies.update(cookies)

    req = scraper.request(method, url, data=params)

    return req

Ejemplo n.º 10

0

Mostrar archivo

Archivo: garminclient.py Proyecto: jorgeboucas/garmin-workouts

    def _connect(self):
        self.session = cloudscraper.CloudScraper()
        self.session.cookies = http.cookiejar.LWPCookieJar(self.cookie_jar)

        if os.path.isfile(self.cookie_jar):
            self.session.cookies.load(ignore_discard=True, ignore_expires=True)

        response = self.session.get(
            "https://connect.garmin.com/modern/settings",
            allow_redirects=False)
        if response.status_code != 200:
            self._LOG.info("Authenticate user '%s'", self.username)
            self._authenticate()
        else:
            self._LOG.info("User '%s' already authenticated", self.username)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: __init__.py Proyecto: cyberjunky/python-garminconnect-ha

    def __init__(self, email, password):
        """Create a new class instance."""
        self.username = email
        self.password = password
        self.session = cloudscraper.CloudScraper()
        self.sso_rest_client = ApiClient(self.session,
                                         "sso.garmin.com/sso",
                                         aditional_headers=self.garmin_headers)
        self.modern_rest_client = ApiClient(
            self.session,
            "connect.garmin.com/modern",
            aditional_headers=self.garmin_headers,
        )

        self.display_name = None

Ejemplo n.º 12

0

Mostrar archivo

Archivo: __init__.py Proyecto: Anrijs/python-garminconnect

    def __init__(self, email, password, is_cn=False):
        """
        Init module
        """
        global BASE_URL
        global SSO_URL
        global SIGNIN_URL
        self.email = email
        self.password = password
        self.req = cloudscraper.CloudScraper()
        self.logger = logging.getLogger(__name__)
        self.display_name = ""
        self.full_name = ""
        self.unit_system = ""
        self.is_cn = is_cn
        if is_cn:
            BASE_URL = BASE_URL.replace(".com", ".cn")
            SSO_URL = SSO_URL.replace(".com", ".cn")
            SIGNIN_URL = SIGNIN_URL.replace(".com", ".cn")

        self.url_user_summary = BASE_URL + '/proxy/usersummary-service/usersummary/daily/'
        self.url_user_summary_chart = BASE_URL + \
            '/proxy/wellness-service/wellness/dailySummaryChart/'
        self.url_heartrates = BASE_URL + '/proxy/wellness-service/wellness/dailyHeartRate/'
        self.url_sleepdata = BASE_URL + '/proxy/wellness-service/wellness/dailySleepData/'
        self.url_body_composition = BASE_URL + \
            '/proxy/weight-service/weight/daterangesnapshot'
        self.url_activities = BASE_URL + \
            '/proxy/activitylist-service/activities/search/activities'
        self.url_hydrationdata = BASE_URL + '/proxy/usersummary-service/usersummary/hydration/daily/'
        self.url_activity = BASE_URL + '/proxy/activity-service/activity/'
        self.url_personal_record = BASE_URL + '/proxy/personalrecord-service/personalrecord/'
        self.url_tcx_download = BASE_URL + "/proxy/download-service/export/tcx/activity/"
        self.url_gpx_download = BASE_URL + "/proxy/download-service/export/gpx/activity/"
        self.url_kml_download = BASE_URL + "/proxy/download-service/export/kml/activity/"
        self.url_fit_download = BASE_URL + "/proxy/download-service/files/activity/"
        self.url_csv_download = BASE_URL + "/proxy/download-service/export/csv/activity/"
        self.url_device_list = BASE_URL + '/proxy/device-service/deviceregistration/devices'
        self.url_device_service = BASE_URL + \
            '/proxy/device-service/deviceservice/'

        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
            'origin':
            'https://sso.garmin.com'
            if not self.is_cn else "https://sso.garmin.cn"
        }

Ejemplo n.º 13

0

Mostrar archivo

def cloudScraperRequest(url, method, **kwargs):
    headers = kwargs.pop('headers', {})
    cookies = kwargs.pop('cookies', {})
    params = kwargs.pop('params', {})

    scraper = cloudscraper.CloudScraper()
    if Prefs['captcha_enable']:
        scraper.captcha = {
            'provider': Prefs['captcha_type'],
            'api_key': Prefs['captcha_key']
        }
    scraper.headers.update(headers)
    scraper.cookies.update(cookies)

    req = scraper.request(method, url, data=params)

    return req

Ejemplo n.º 14

0

Mostrar archivo

Archivo: crawler.py Proyecto: Mire96/EtherView

def bsGetBalanceAtTime(balance_date, address):
    if not w3.isAddress(address):
        raise KeyError('Invalid address!')

    #This conversion is needed while testing outside of django
    #balance_date = datetime.strptime(balance_date, '%Y-%m-%d')

    if balance_date > date.today():
        raise ValueError('Date is in the future!')
    dt = correctDateFormating(balance_date)
    scraper = cloudscraper.CloudScraper()  #Bypassing cloudflare

    #Populating required parameters for form submission
    rp = scraper.get('https://etherscan.io/balancecheck-tool')
    sp = bs(rp.text, 'html.parser')
    event_target = sp.find(id="__EVENTTARGET").get('value')
    event_argument = sp.find(id="__EVENTARGUMENT").get('value')
    view_state = sp.find(id="__VIEWSTATE").get('value')
    view_state_gen = sp.find(id="__VIEWSTATEGENERATOR").get('value')
    event_validation = sp.find(id="__EVENTVALIDATION").get('value')
    params = {
        '__EVENTTARGET': event_target,
        '__EVENTARGUMENT': event_argument,
        '__VIEWSTATE': view_state,
        '__VIEWSTATEGENERATOR': view_state_gen,
        '__EVENTVALIDATION': event_validation,
        'ctl00$ContentPlaceHolder1$txtAddress': address,
        'date': dt,
        'ctl00$ContentPlaceHolder1$txtBlockNo': '',
        'ctl00$ContentPlaceHolder1$Button1': 'Lookup'
    }
    #Scraping the response for Eth balance
    response = scraper.post('https://etherscan.io/balancecheck-tool',
                            data=params)
    soup = bs(response.text, 'html.parser')

    #Testing html
    with open('test.html', 'w') as f:
        f.write(response.text)

    balance = soup.find("span", {
        "class": "text-size-1 text-break"
    }).text.split()
    #To save this in the database, we must remove the text, so we can convert the balance
    ether_balance = Decimal(balance[0].replace(',', ''))
    return ether_balance

Ejemplo n.º 15

0

Mostrar archivo

    def __init__(self, email, password, auth_domain):
        """
        Init module
        """
        self.email = email
        self.password = password
        self.req = httpx.AsyncClient(timeout=TIME_OUT)
        self.cf_req = cloudscraper.CloudScraper()
        self.URL_DICT = (GARMIN_CN_URL_DICT
                         if auth_domain and str(auth_domain).upper() == "CN"
                         else GARMIN_COM_URL_DICT)
        self.modern_url = self.URL_DICT.get("MODERN_URL")

        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36",
            "origin": self.URL_DICT.get("SSO_URL_ORIGIN"),
        }

Ejemplo n.º 16

0

Mostrar archivo

Archivo: duce.py Proyecto: Saurabh11Sharma/Udemy-Course-Grabber

def manual_login():
    for retry in range(0, 2):

        s = cloudscraper.CloudScraper()
        r = s.get("https://www.udemy.com/join/signup-popup/", )
        soup = bs(r.text, "html5lib")

        csrf_token = soup.find("input",
                               {"name": "csrfmiddlewaretoken"})["value"]

        data = {
            "csrfmiddlewaretoken": csrf_token,
            "locale": "en_US",
            "email": "*****@*****.**",
            "password": "******",
        }

        s.headers.update(
            {"Referer": "https://www.udemy.com/join/signup-popup/"})
        try:
            r = s.post(
                "https://www.udemy.com/join/login-popup/?locale=en_US",
                data=data,
                allow_redirects=False,
            )
        except cloudscraper.exceptions.CloudflareChallengeError:
            continue
        if r.status_code == 302:
            return "", r.cookies["client_id"], r.cookies[
                "access_token"], csrf_token
        else:
            soup = bs(r.content, "html5lib")
            txt = soup.find(
                "div",
                class_="alert alert-danger js-error-alert").string.strip()
            if txt[0] == "Y":
                return "Too many logins per hour try later", "", "", ""
            elif txt[0] == "T":
                return "Email or password incorrect", "", "", ""
            else:
                return txt, "", "", ""

    return "Cloudflare is blocking your requests try again after an hour", "", "", ""

Ejemplo n.º 17

0

Mostrar archivo

    def GetWebContent(self, bangou):  # XXX: improve
        link = "https://www.r18.com/common/search/searchword=" + bangou
        scraper = cloudscraper.CloudScraper()
        response = scraper.get(link)
        self.soup = BeautifulSoup(response.text, "html.parser")
        item_list = self.soup.select(".item-list")
        # print(item_list)
        if not item_list:
            return ""
        try:  # TODO: check try range
            for item in item_list:
                link = item.select_one('a')['href']
                # print(link)
                response = scraper.get(link)
                self.soup = BeautifulSoup(response.text, "html.parser")
                infos = self.soup.select_one(".product-details").dl
                print('infos', infos)
                # print('infos children', infos.children())

                self.infoDict = dict()
                key = ''
                value = ''
                for child in infos.children():
                    print(child)
                    if child.name == 'dt':
                        key = child.getText()
                    elif child.name == 'dd':
                        value = child.getText()

                    if key and value:
                        print(key, value)
                        self.infoDict[key.strip()] = value.strip()
                        key = ''
                        value = ''

                # if self.infoDict['ID'] and self.infoDict['ID'] == bangou:
                #     return link
        except Exception as ex:
            # TODO: get web content failed
            print(ex)
            return link

        return link

Ejemplo n.º 18

0

Mostrar archivo

def getSession(pageNum):
    zoomInfoSessionFirefox = cloudscraper.CloudScraper(browser={
        'browser': 'firefox',
        'mobile': False,
        'platform': 'windows'
    })
    getURL = str(str(companyLink) + '?pageNum=' + str(pageNum))
    try:
        zoomInfoGetPage = zoomInfoSessionFirefox.get(getURL,
                                                     allow_redirects=True)
    except cloudscraper.exceptions.CloudflareChallengeError as e:
        print('\n[-] Error encountered, retrying request...\n')
        time.sleep(1)
        try:
            zoomInfoGetPage = zoomInfoSessionFirefox.get(getURL,
                                                         allow_redirects=True)
        except:
            print('\n[-] Unrecoverable error, exiting...\n')
            pass
    return zoomInfoGetPage

Ejemplo n.º 19

0

Mostrar archivo

 def __init__(self):
     """Create a new Download class instance."""
     logger.debug("__init__")
     self.session = cloudscraper.CloudScraper()
     self.sso_rest_client = RestClient(
         self.session,
         'sso.garmin.com',
         'sso',
         aditional_headers=self.garmin_headers)
     self.modern_rest_client = RestClient(
         self.session,
         'connect.garmin.com',
         'modern',
         aditional_headers=self.garmin_headers)
     self.activity_service_rest_client = RestClient.inherit(
         self.modern_rest_client, "proxy/activity-service/activity")
     self.download_service_rest_client = RestClient.inherit(
         self.modern_rest_client, "proxy/download-service/files")
     self.gc_config = GarminConnectConfigManager()
     self.download_days_overlap = 3  # Existing donloaded data will be redownloaded and overwritten if it is within this number of days of now.

Ejemplo n.º 20

0

Mostrar archivo

	def __init__(self, *args, **kwargs):
		super().__init__(*args, **kwargs)

		self.wg = WebRequest.WebGetRobust(chromium_headless=False)

		# This is.... kind of horrible.
		self.wg.errorOutCount = 1

		# proxy = SocksProxy.ProxyLauncher([TwoCaptchaSolver.TWOCAPTCHA_IP])
		recaptcha_params = {
				'provider': 'anticaptcha',
				'api_key': settings["captcha"]["anti-captcha"]['api_key'],

				# 'proxy'       : proxy.get_wan_address(),
				# 'proxytype'   : "SOCKS5",
			}

		self.req = cloudscraper.CloudScraper(
				recaptcha = recaptcha_params,
			)

		self.req.headers.update(self.wg.browserHeaders)

Ejemplo n.º 21

0

Mostrar archivo

    def parse_page(self, url):        \
		# self.logger.info(f'Parsing started for: {url}')

        print(f'Parsing started for: {url}')

        # login and start the session
        scraper = cloudscraper.CloudScraper()
        self.login()
        scraper.post(url=self.login_url, data=self.credentials)

        # create the BeautifulSoup object
        soup = BeautifulSoup(
            scraper.get(url=url, headers=self.headers).content, 'html5lib')

        # identify the set and change to directory for it
        self.identify_set(soup)

        # find all the images linked to on the page
        urls = self.find_image_links(url, soup)

        if urls is not None:
            # download the images
            self.download_files_parallel(urls)
        else:
            print("no images found")

        # find all the videos linked to on the page
        urls = self.find_video_links(url, soup)

        if urls is not None:
            # download the videos
            super().download_files_parallel(urls)
        else:
            print("no videos found")

        # self.logger.info(f'Parsing complete for: {url}')
        print(f'Parsing complete for: {url}')

Ejemplo n.º 22

0

Mostrar archivo

Archivo: get-list-emiten.py Proyecto: AgungPambudi/IDX-Scrapper

# data-data
stock_code = []
stock_name = []
stock_listingDate = []
stock_shares = []
stock_listingBoard = []

# data-data lq45
lq45_code = []
lq45_name = []
lq45_listingDate = []
lq45_shares = []
lq45_listingBoard = []

# http client
http = cloudscraper.CloudScraper()

while True:
    # buat link
    link = f"https://idx.co.id/umbraco/Surface/StockData/GetSecuritiesStock?code=&sector=&board=&start={start}&length={length}"

    # send request
    result = http.get(link).text
    result = json.loads(result)

    # result empty?
    # kalo iya, berarti daftar
    # emitennya sudah habis
    if result["data"] == []:
        break
    else:

Ejemplo n.º 23

0

Mostrar archivo

Archivo: __init__.py Proyecto: cyberjunky/python-garminconnect

    def __init__(self, email, password, is_cn=False, session_data=None):
        """Create a new class instance."""
        self.session_data = session_data

        self.username = email
        self.password = password
        self.is_cn = is_cn

        self.garmin_connect_base_url = "https://connect.garmin.com"
        self.garmin_connect_sso_url = "sso.garmin.com/sso"
        self.garmin_connect_modern_url = "connect.garmin.com/modern"
        self.garmin_connect_css_url = "https://static.garmincdn.com/com.garmin.connect/ui/css/gauth-custom-v1.2-min.css"

        if self.is_cn:
            self.garmin_connect_base_url = "https://connect.garmin.cn"
            self.garmin_connect_sso_url = "sso.garmin.cn/sso"
            self.garmin_connect_modern_url = "connect.garmin.cn/modern"
            self.garmin_connect_css_url = "https://static.garmincdn.cn/cn.garmin.connect/ui/css/gauth-custom-v1.2-min.css"

        self.garmin_connect_login_url = self.garmin_connect_base_url + "/en-US/signin"
        self.garmin_connect_sso_login = "******"

        self.garmin_connect_devices_url = (
            "proxy/device-service/deviceregistration/devices")
        self.garmin_connect_device_url = "proxy/device-service/deviceservice"
        self.garmin_connect_weight_url = "proxy/weight-service/weight/dateRange"
        self.garmin_connect_daily_summary_url = (
            "proxy/usersummary-service/usersummary/daily")
        self.garmin_connect_metrics_url = "proxy/metrics-service/metrics/maxmet/daily"
        self.garmin_connect_daily_hydration_url = (
            "proxy/usersummary-service/usersummary/hydration/daily")
        self.garmin_connect_personal_record_url = (
            "proxy/personalrecord-service/personalrecord/prs")
        self.garmin_connect_earned_badges_url = (
            "proxy/badge-service/badge/earned")
        self.garmin_connect_adhoc_challenges_url = (
            "proxy/adhocchallenge-service/adHocChallenge/historical")
        self.garmin_connect_badge_challenges_url = (
            "proxy/badgechallenge-service/badgeChallenge/completed")
        self.garmin_connect_daily_sleep_url = (
            "proxy/wellness-service/wellness/dailySleepData")
        self.garmin_connect_daily_stress_url = "proxy/wellness-service/wellness/dailyStress"

        self.garmin_connect_rhr = "proxy/userstats-service/wellness/daily"

        self.garmin_connect_user_summary_chart = (
            "proxy/wellness-service/wellness/dailySummaryChart")
        self.garmin_connect_heartrates_daily_url = (
            "proxy/wellness-service/wellness/dailyHeartRate")
        self.garmin_connect_daily_respiration_url = (
            "proxy/wellness-service/wellness/daily/respiration")
        self.garmin_connect_daily_spo2_url = (
            "proxy/wellness-service/wellness/daily/spo2")
        self.garmin_connect_activities = (
            "proxy/activitylist-service/activities/search/activities")
        self.garmin_connect_activity = "proxy/activity-service/activity"

        self.garmin_connect_fit_download = "proxy/download-service/files/activity"
        self.garmin_connect_tcx_download = "proxy/download-service/export/tcx/activity"
        self.garmin_connect_gpx_download = "proxy/download-service/export/gpx/activity"
        self.garmin_connect_kml_download = "proxy/download-service/export/kml/activity"
        self.garmin_connect_csv_download = "proxy/download-service/export/csv/activity"
        self.garmin_connect_gear = "proxy/gear-service/gear/filterGear"

        self.garmin_connect_logout = "auth/logout/?url="

        self.garmin_headers = {"NK": "NT"}

        self.session = cloudscraper.CloudScraper()
        self.sso_rest_client = ApiClient(
            self.session,
            self.garmin_connect_sso_url,
            aditional_headers=self.garmin_headers,
        )
        self.modern_rest_client = ApiClient(
            self.session,
            self.garmin_connect_modern_url,
            aditional_headers=self.garmin_headers,
        )

        self.display_name = None
        self.full_name = None
        self.unit_system = None

Ejemplo n.º 24

0

Mostrar archivo

 def test_js_challenge_21_05_2015(self, **kwargs):
     scraper = cloudscraper.CloudScraper(**kwargs)
     expect(scraper.get(url).content).to.equal(requested_page)

Ejemplo n.º 25

0

Mostrar archivo

from bs4 import BeautifulSoup
import requests, re
from time import sleep
import cloudscraper
import os.path

scraper = cloudscraper.CloudScraper(delay=5,
                                    browser={
                                        "browser": "chrome",
                                        "platform": "windows",
                                        "mobile": False,
                                        "desktop": True,
                                    })
base_url = "https://www.fanfiction.net/"

genres = ("Adventure", "Angst", "Crime", "Drama", "Family", "Fantasy",
          "Friendship", "General", "Horror", "Humor", "Hurt", "Mystery",
          "Parody", "Poetry", "Romancy", "Sci-Fi", "Spiritual", "Supernatural",
          "Suspense", "Tragedy", "Western")


class Scraper:
    def get_categories(self):
        categories = []

        html_url = scraper.get(base_url).text
        soup = BeautifulSoup(html_url, "lxml")

        find_categories_container = soup.find(class_="dropdown-menu")
        find_categories = find_categories_container.find_all("a")

Ejemplo n.º 26

0

Mostrar archivo

    def _get_session(self, record=None, email=None, password=None):
        session = cloudscraper.CloudScraper()

        # JSIG CAS, cool I guess.
        # Not quite OAuth though, so I'll continue to collect raw credentials.
        # Commented stuff left in case this ever breaks because of missing parameters...
        data = {
            'username': email,
            'password': password,
            '_eventId': 'submit',
            'embed': 'true',
            # 'displayNameRequired': 'false'
        }
        params = {
            'service': 'https://connect.garmin.com/modern',
            # 'redirectAfterAccountLoginUrl': 'http://connect.garmin.com/modern',
            # 'redirectAfterAccountCreationUrl': 'http://connect.garmin.com/modern',
            # 'webhost': 'olaxpw-connect00.garmin.com',
            'clientId': 'GarminConnect',
            'gauthHost': 'https://sso.garmin.com/sso',
            # 'rememberMeShown': 'true',
            # 'rememberMeChecked': 'false',
            'consumeServiceTicket': 'false',
            # 'id': 'gauth-widget',
            # 'embedWidget': 'false',
            # 'cssUrl': 'https://static.garmincdn.com/com.garmin.connect/ui/src-css/gauth-custom.css',
            # 'source': 'http://connect.garmin.com/en-US/signin',
            # 'createAccountShown': 'true',
            # 'openCreateAccount': 'false',
            # 'usernameShown': 'true',
            # 'displayNameShown': 'false',
            # 'initialFocus': 'true',
            # 'locale': 'en'
        }

        headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
                'Referer': 'https://jhartman.pl',
                'origin': 'https://sso.garmin.com'
            }

        # I may never understand what motivates people to mangle a perfectly good protocol like HTTP in the ways they do...
        preResp = session.get('https://sso.garmin.com/sso/signin', params=params, headers=headers)
        if preResp.status_code != 200:
            raise APIException('SSO prestart error %s %s' % (preResp.status_code, preResp.text))

        ssoResp = session.post('https://sso.garmin.com/sso/login', params=params, data=data, allow_redirects=False, headers=headers)
        
        if ssoResp.status_code != 200 or 'temporarily unavailable' in ssoResp.text:
            raise APIException('SSO error %s %s' % (ssoResp.status_code, ssoResp.text))

        if '>sendEvent(\'FAIL\')' in ssoResp.text:
            raise APIException('Invalid login')
        
        if '>sendEvent(\'ACCOUNT_LOCKED\')' in ssoResp.text:
            raise APIException('Account Locked')

        if 'renewPassword' in ssoResp.text:
            raise APIException('Reset password')

        # self.print_cookies(cookies=session.cookies)

        # ...AND WE'RE NOT DONE YET!

        gcRedeemResp = session.get('https://connect.garmin.com/modern',
                                   allow_redirects=False,
                                   headers=headers)
        if gcRedeemResp.status_code != 302:
            raise APIException(f'GC redeem-start error {gcRedeemResp.status_code} {gcRedeemResp.text}')

        url_prefix = 'https://connect.garmin.com'

        # There are 6 redirects that need to be followed to get the correct cookie
        # ... :(
        max_redirect_count = 7
        current_redirect_count = 1
        while True:
            url = gcRedeemResp.headers['location']

            # Fix up relative redirects.
            if url.startswith('/'):
                url = url_prefix + url
            url_prefix = '/'.join(url.split('/')[:3])
            gcRedeemResp = session.get(url, allow_redirects=False)

            if (current_redirect_count >= max_redirect_count and
                gcRedeemResp.status_code != 200):
                raise APIException(f'GC redeem {current_redirect_count}/'
                                   '{max_redirect_count} error '
                                   '{gcRedeemResp.status_code} '
                                   '{gcRedeemResp.text}')

            if gcRedeemResp.status_code in [200, 404]:
                break

            current_redirect_count += 1
            if current_redirect_count > max_redirect_count:
                break

        # self.print_cookies(session.cookies)

        session.headers.update(headers)

        return session

Ejemplo n.º 27

0

Mostrar archivo

Archivo: playlistcids.py Proyecto: raskolguthub/script.mtvguide

if sys.version_info[0] > 2:
    from requests.exceptions import HTTPError, ConnectionError, Timeout, RequestException
else:
    from requests import HTTPError, ConnectionError, Timeout, RequestException

import copy, re
import xbmc, xbmcgui, xbmcvfs
from strings import *
from serviceLib import *
import cloudscraper

from contextlib import contextmanager

sess = cloudscraper.create_scraper()
scraper = cloudscraper.CloudScraper()

serviceName = 'playlist'

playlists = [
    'playlist_1', 'playlist_2', 'playlist_3', 'playlist_4', 'playlist_5'
]


class PlaylistUpdater(baseServiceUpdater):
    def __init__(self, instance_number):
        self.serviceName = serviceName + "_{}".format(instance_number)
        self.instance_number = str(instance_number)
        self.localMapFile = 'playlistmap.xml'
        baseServiceUpdater.__init__(self)
        self.servicePriority = int(

Ejemplo n.º 28

0

Mostrar archivo

Archivo: subscene_api.py Proyecto: quekky/subscene-dl

import re
import enum
import time
from contextlib import suppress
# from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import cloudscraper
from fuzzywuzzy import fuzz, process

# constants
HEADERS = {
    "User-Agent":
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36"
}
SITE_DOMAIN = "https://subscene.com"
request_session = cloudscraper.CloudScraper()


# utils
def soup_for(url):
    url = re.sub("\s", "+", url)
    # r = Request(url, data=None, headers=HEADERS)
    # html = urlopen(r).read().decode("utf-8")
    html = request_session.get(url, headers=HEADERS)
    return BeautifulSoup(html.content, "html.parser")


class AttrDict():
    def __init__(self, *attrs):
        self._attrs = attrs

Ejemplo n.º 29

0

Mostrar archivo

    def get_story_metadata(self):

        if re.search(URL_VALIDATE, self.BaseUrl):

            logger.info(f"Processing {self.BaseUrl}")

            self.scraper = cloudscraper.CloudScraper(delay=2,
                                                     browser={
                                                         'browser': 'chrome',
                                                         'platform': 'windows',
                                                         'mobile': False,
                                                         'desktop': True,
                                                     })

            response = self.scraper.get(self.BaseUrl)

            logger.debug(f"GET: {response.status_code}: {response.url}")
            ffn_soup = BeautifulSoup(response.content, 'html.parser')

            try:
                self.ffn_story_name = ffn_soup.find_all(
                    'b', 'xcontrast_txt')[0].string.strip()

            except IndexError:  # Story Not Found
                logger.error("ffn_story_name is missing.")
                self.ffn_story_name = None
                return

            self.ffn_story_id = (re.search(r"\d+", self.BaseUrl)).group(0)

            self.ffn_author_name = ffn_soup.find_all(
                'a', {'href': re.compile(r'^/u/\d+/.')})[0].string.strip()

            self.ffn_author_url = (ffn_soup.find('div',
                                                 attrs={
                                                     'id': 'profile_top'
                                                 }).find('a',
                                                         href=True))['href']

            self.ffn_author_id = (re.search(r"\d+",
                                            self.ffn_author_url)).group(0)

            try:
                self.ffn_story_summary = ffn_soup.find_all(
                    'div', {
                        'style': 'margin-top:2px',
                        'class': 'xcontrast_txt'
                    })[0].string.strip()

            except IndexError:  # Missing summary
                logger.error("ffn_story_summary is missing.")
                self.ffn_story_summary = ""

            self.ffn_story_fandom = ffn_soup.find('span',
                                                  attrs={
                                                      'class': 'lc-left'
                                                  }).find('a',
                                                          attrs={
                                                              'class':
                                                              'xcontrast_txt'
                                                          }).text

            # if the fandom isnt crossover, then go to the next <a>
            if not re.search(r"\bcrossover\b", self.ffn_story_fandom,
                             re.IGNORECASE):
                self.ffn_story_fandom = ffn_soup.find('span',
                                                      attrs={
                                                          'class': 'lc-left'
                                                      }).find(
                                                          'a',
                                                          attrs={
                                                              'class':
                                                              'xcontrast_txt'
                                                          }).findNext('a').text

            self.details = ffn_soup.find_all(
                'span', {'class': 'xgray xcontrast_txt'})[0].text.split(' - ')

            self.dates = [
                date for date in ffn_soup.find_all('span')
                if date.has_attr('data-xutime')
            ]

            for i in range(0, len(self.details)):

                if self.details[i].startswith('Updated:'):

                    self.ffn_story_status = "In-Progress"

                    self.ffn_story_last_updated = datetime.fromtimestamp(
                        int(self.dates[0]['data-xutime']))

                    self.ffn_story_published = datetime.fromtimestamp(
                        int(self.dates[1]['data-xutime']))  # Published date

                    # change formatting
                    self.ffn_story_last_updated = self.ffn_story_last_updated.strftime(
                        r'%Y-%m-%d')
                    self.ffn_story_published = self.ffn_story_published.strftime(
                        r'%Y-%m-%d')

                    break  # if found, exit the loop to prevent overwriting of the variable

                elif self.details[i].startswith('Published:'):

                    self.ffn_story_status = "Completed"

                    # if Updated not found, pub & last_up will be same
                    self.ffn_story_last_updated = str(
                        datetime.fromtimestamp(
                            int(self.dates[0]
                                ['data-xutime'])))  # Published date

                    self.ffn_story_published = str(
                        datetime.fromtimestamp(
                            int(self.dates[0]
                                ['data-xutime'])))  # Published dat

                    # change formatting
                    self.ffn_story_last_updated = datetime.strptime(
                        self.ffn_story_last_updated, '%Y-%m-%d %H:%M:%S')

                    self.ffn_story_published = datetime.strptime(
                        self.ffn_story_published, '%Y-%m-%d %H:%M:%S')

                    self.ffn_story_last_updated = self.ffn_story_last_updated.strftime(
                        r'%-d %b, %Y ')
                    self.ffn_story_published = self.ffn_story_published.strftime(
                        r'%-d %b, %Y ')

            for i in range(0, len(self.details)):

                if self.details[i].startswith('Reviews:'):

                    self.ffn_story_reviews = self.details[i].replace(
                        'Reviews:', '').strip()

                    break  # if found, exit the loop to prevent overwriting of the variable

                else:
                    self.ffn_story_reviews = 'Not found'

            for i in range(0, len(self.details)):
                if self.details[i].startswith('Favs:'):

                    self.ffn_story_favs = self.details[i].replace('Favs:',
                                                                  '').strip()

                    break  # if found, exit the loop to prevent overwriting of the variable

                else:
                    self.ffn_story_favs = 'Not found'

            for i in range(0, len(self.details)):
                if self.details[i].startswith('Follows:'):

                    self.ffn_story_follows = self.details[i].replace(
                        'Follows:', '').strip()

                    break  # if found, exit the loop to prevent overwriting of the variable

                else:
                    self.ffn_story_follows = 'Not found'

            for i in range(0, len(self.details)):
                if self.details[i].startswith('Rated:'):

                    self.ffn_story_rating = self.details[i].replace(
                        'Rated:', '').strip()

                    break  # if found, exit the loop to prevent overwriting of the variable

                else:
                    self.ffn_story_rating = 'Not found'

            self.ffn_story_lang = self.details[1]
            self.ffn_story_genre = self.details[2]
            self.ffn_story_characters = self.details[3]

            if re.search(r'\d', str(self.ffn_story_characters)):
                self.ffn_story_characters = "Not Found"

            search = [x for x in self.details if x.startswith("Words:")]
            if len(search) == 0:
                self.ffn_story_length = 0
            else:
                self.ffn_story_length = int(search[0][len("Words:"):].replace(
                    ',', ''))

                self.ffn_story_length = "{:,}".format(
                    int(self.ffn_story_length))

            search = [x for x in self.details if x.startswith("Chapters:")]
            if len(search) == 0:
                self.ffn_story_chapters = 1  # 1 as the default chapter number
            else:
                self.ffn_story_chapters = str(
                    int(search[0][len("Chapters:"):].replace(',',
                                                             ''))).strip()

            self.ffn_author_url = "https://www.fanfiction.net" + self.ffn_author_url

            # remove everything after &sa from the BaseUrl
            if re.search(r"^(.*?)&", self.BaseUrl) is not None:
                self.BaseUrl = re.search(r"^(.*?)&", self.BaseUrl).group(1)

        else:
            logger.error("BaseUrl is invalid")

Ejemplo n.º 30

0

Mostrar archivo

Archivo: crunchyroll-guest-pass-finder.py Proyecto: TAAPArthur/CrunchyrollGuestPassFinder

    password = namespace.password

    if not credentials:
        if not username:
            username = input("Username:"******"Password:"******"the dir specified does not exists: %s", CONFIG_DIR)
        mkdir(CONFIG_DIR)

    if namespace.save:
        with open(path.join(CONFIG_DIR, "accounts.json"), 'w', encoding='utf-8') as f:
            json.dump(accountInfo, f, indent=4)
        exit(0)

    for username, password in credentials:
        crunchyrollGuestPassFinder = CrunchyrollGuestPassFinder(cloudscraper.CloudScraper())
        if crunchyrollGuestPassFinder.login(username, password) and not namespace.dry_run:
            logging.info("logged into %s", username)
            if crunchyrollGuestPassFinder.isAccountNonPremium():
                crunchyrollGuestPassFinder.findGuestPassAndActivateAccount()
            else:
                logging.info("Account '%s' is already premium", username)