def load(use_cache_server=True): browsers_dict = {} randomize_dict = {} try: for item in get_browsers(): browser, percent = item browser_key = browser for value, replacement in settings.REPLACEMENTS.items(): browser_key = browser_key.replace(value, replacement) browser_key = browser_key.lower() browsers_dict[browser_key] = get_browser_versions(browser) # it is actually so bad way for randomizing, simple list with # browser_key's is event better # I've failed so much a lot of years ago. # Ideas for refactoring # {'chrome': <percantage|int>, 'firefox': '<percatage|int>'} for _ in range(int(float(percent) * 10)): randomize_dict[str(len(randomize_dict))] = browser_key except Exception as exc: if not use_cache_server: raise exc logger.warning( 'Error occurred during loading data. ' 'Trying to use cache server %s', settings.CACHE_SERVER, exc_info=exc, ) try: ret = json.loads(get(settings.CACHE_SERVER).decode('utf-8')) except (TypeError, ValueError): raise FakeUserAgentError('Can not load data from cache server') else: ret = { 'browsers': browsers_dict, 'randomize': randomize_dict, } if not isinstance(ret, dict): raise FakeUserAgentError('Data is not dictionary ', ret) for param in ['browsers', 'randomize']: if param not in ret: raise FakeUserAgentError('Missing data param: ', param) if not isinstance(ret[param], dict): raise FakeUserAgentError('Data param is not dictionary', ret[param]) # noqa if not ret[param]: raise FakeUserAgentError('Data param is empty', ret[param]) return ret
def __getattr__(self, attr): if attr in self.safe_attrs: return super(UserAgent, self).__getattr__(attr) try: for value, replacement in settings.REPLACEMENTS.items(): attr = attr.replace(value, replacement) attr = attr.lower() if attr == 'random': browser = random.choice(self.data_randomize) else: browser = settings.SHORTCUTS.get(attr, attr) return random.choice(self.data_browsers[browser]) except (KeyError, IndexError): if self.fallback is None: raise FakeUserAgentError( 'Error occurred during getting browser') # noqa else: logger.warning( 'Error occurred during getting browser, ' 'but was suppressed with fallback.', ) return self.fallback
def get_browser_versions(browser, verify_ssl=True): """ very very hardcoded/dirty re/split stuff, but no dependencies """ html = get( settings.BROWSER_BASE_PAGE.format(browser=quote_plus(browser)), verify_ssl=verify_ssl, ) html = html.decode('iso-8859-1') html = html.split('<div id=\'liste\'>')[1] html = html.split('</div>')[0] pattern = r'\?id=\d+\'>(.+?)</a' browsers_iter = re.finditer(pattern, html, re.UNICODE) browsers = [] for browser in browsers_iter: if 'more' in browser.group(1).lower(): continue browsers.append(browser.group(1)) if len(browsers) == settings.BROWSERS_COUNT_LIMIT: break if not browsers: raise FakeUserAgentError( 'No browsers version found for {browser}'.format(browser=browser)) return browsers
def get(url): request = Request(url) attempt = 0 while True: attempt += 1 try: return urlopen(request, timeout=settings.HTTP_TIMEOUT).read() except (URLError, OSError) as exc: logger.debug( 'Error occurred during fetching %s', url, exc_info=exc, ) if attempt == settings.HTTP_RETRIES: raise FakeUserAgentError('Maximum amount of retries reached') else: logger.debug( 'Sleeping for %s secconds', settings.HTTP_TIMEOUT, ) sleep(settings.HTTP_TIMEOUT)
def get_ua(self, proxy=None): '''Gets random UA based on the type setting (random, firefox…)''' if proxy and proxy in self.proxy2ua: return self.proxy2ua[proxy] uas = self.uas for key in self.ua_type: try: uas = uas[key] except (KeyError, IndexError): uas = None if uas is None: if self.fallback is None: raise FakeUserAgentError( 'Error occurred during getting browser') else: logger.warning( 'Error occurred during getting browser for type "%s", ' 'but was suppressed with fallback.', '.'.join(self.ua_type)) return self.fallback ua = random.choice(uas) if isinstance(uas, list) else uas if proxy: self.proxy2ua[proxy] = ua return ua
def get(url, verify_ssl=True): attempt = 0 while True: request = Request(url) attempt += 1 try: if urlopen_has_ssl_context: if not verify_ssl: context = ssl._create_unverified_context() else: context = None with contextlib.closing(urlopen( request, timeout=settings.HTTP_TIMEOUT, context=context, )) as response: return response.read() else: # ssl context is not supported ;( with contextlib.closing(urlopen( request, timeout=settings.HTTP_TIMEOUT, )) as response: return response.read() except (URLError, OSError) as exc: logger.debug( 'Error occurred during fetching %s', url, exc_info=exc, ) if attempt == settings.HTTP_RETRIES: raise FakeUserAgentError('Maximum amount of retries reached') else: logger.debug( 'Sleeping for %s seconds', settings.HTTP_DELAY, ) try: response = requests.get(url) return response.content except: pass sleep(settings.HTTP_DELAY)