def query_user_page(url, retry=10, timeout=60): """ Returns the scraped user data from a twitter user page. :param url: The URL to get the twitter user info from (url contains the user page) :param retry: Number of retries if something goes wrong. :return: Returns the scraped user data from a twitter user page. """ try: proxy = next(proxy_pool) logger.info('Using proxy {}'.format(proxy)) response = requests.get(url, headers=HEADER, proxies={"http": proxy}) html = response.text or '' user_info = User.from_html(html) if not user_info: return None return user_info except requests.exceptions.HTTPError as e: logger.exception('HTTPError {} while requesting "{}"'.format(e, url)) except requests.exceptions.ConnectionError as e: logger.exception('ConnectionError {} while requesting "{}"'.format( e, url)) except requests.exceptions.Timeout as e: logger.exception('TimeOut {} while requesting "{}"'.format(e, url)) if retry > 0: logger.info('Retrying... (Attempts left: {})'.format(retry)) return query_user_page(url, retry - 1) logger.error('Giving up.') return None
def query_user_page(url, retry=10): """ Returns the scraped user data from a twitter user page. :param url: The URL to get the twitter user info from (url contains the user page) :param retry: Number of retries if something goes wrong. :return: Returns the scraped user data from a twitter user page. """ try: response = requests.get(url, headers=HEADER) html = response.text or "" user = User() user_info = user.from_html(html) if not user_info: return None return user_info except requests.exceptions.HTTPError as e: logger.exception('HTTPError {} while requesting "{}"'.format(e, url)) except requests.exceptions.ConnectionError as e: logger.exception('ConnectionError {} while requesting "{}"'.format( e, url)) except requests.exceptions.Timeout as e: logger.exception('TimeOut {} while requesting "{}"'.format(e, url)) if retry > 0: logger.info("Retrying... (Attempts left: {})".format(retry)) return query_user_page(url, retry - 1) logger.error("Giving up.") return None
def query_user_page(url, retry=10): """ Returns the scraped user data from a twitter user page. :param url: The URL to get the twitter user info from (url contains the user page) :param retry: Number of retries if something goes wrong. :return: Returns the scraped user data from a twitter user page. """ try: response = requests.get(url, headers=HEADER) html = response.text or '' user = User() user_info = user.from_html(html) if not user_info: return None return user_info except requests.exceptions.HTTPError as e: logger.exception('HTTPError {} while requesting "{}"'.format( e, url)) except requests.exceptions.ConnectionError as e: logger.exception('ConnectionError {} while requesting "{}"'.format( e, url)) except requests.exceptions.Timeout as e: logger.exception('TimeOut {} while requesting "{}"'.format( e, url)) if retry > 0: logger.info('Retrying... (Attempts left: {})'.format(retry)) return query_user_page(url, retry-1) logger.error('Giving up.') return None