def query_user_page(url, retry=10): """ Returns the scraped user data from a twitter user page. :param url: The URL to get the twitter user info from (url contains the user page) :param retry: Number of retries if something goes wrong. :return: Returns the scraped user data from a twitter user page. """ try: response = requests.get(url, headers=HEADER) html = response.text or "" user = User() user_info = user.from_html(html) if not user_info: return None return user_info except requests.exceptions.HTTPError as e: logger.exception('HTTPError {} while requesting "{}"'.format(e, url)) except requests.exceptions.ConnectionError as e: logger.exception('ConnectionError {} while requesting "{}"'.format( e, url)) except requests.exceptions.Timeout as e: logger.exception('TimeOut {} while requesting "{}"'.format(e, url)) if retry > 0: logger.info("Retrying... (Attempts left: {})".format(retry)) return query_user_page(url, retry - 1) logger.error("Giving up.") return None
def query_user_page(url, retry=10, timeout=60): """ Returns the scraped user data from a twitter user page. :param url: The URL to get the twitter user info from (url contains the user page) :param retry: Number of retries if something goes wrong. :return: Returns the scraped user data from a twitter user page. """ try: proxy = next(proxy_pool) logger.info('Using proxy {}'.format(proxy)) response = requests.get(url, headers=HEADER, proxies={"http": proxy}) html = response.text or '' user_info = User.from_html(html) if not user_info: return None return user_info except requests.exceptions.HTTPError as e: logger.exception('HTTPError {} while requesting "{}"'.format(e, url)) except requests.exceptions.ConnectionError as e: logger.exception('ConnectionError {} while requesting "{}"'.format( e, url)) except requests.exceptions.Timeout as e: logger.exception('TimeOut {} while requesting "{}"'.format(e, url)) if retry > 0: logger.info('Retrying... (Attempts left: {})'.format(retry)) return query_user_page(url, retry - 1) logger.error('Giving up.') return None
def query_user_page(url, retry=10): """ Returns the scraped user data from a twitter user page. :param url: The URL to get the twitter user info from (url contains the user page) :param retry: Number of retries if something goes wrong. :return: Returns the scraped user data from a twitter user page. """ try: response = requests.get(url, headers=HEADER) html = response.text or '' user = User() user_info = user.from_html(html) if not user_info: return None return user_info except requests.exceptions.HTTPError as e: logger.exception('HTTPError {} while requesting "{}"'.format( e, url)) except requests.exceptions.ConnectionError as e: logger.exception('ConnectionError {} while requesting "{}"'.format( e, url)) except requests.exceptions.Timeout as e: logger.exception('TimeOut {} while requesting "{}"'.format( e, url)) if retry > 0: logger.info('Retrying... (Attempts left: {})'.format(retry)) return query_user_page(url, retry-1) logger.error('Giving up.') return None
from twitterscraper.user import User from pprint import pprint user = User() """ El primer if decide que poner y tiene then y else El segundo if es filtro, solo tiene if """ filter = 'location' change = 'Seguidores' twitter_user = '******' ejemplo = { (change if key == 'followers' else key):(twitter_user if key == 'user' else value) for key, value in User().__dict__.items() if key != filter} user_info = {key: (twitter_user if key == 'user' else value) for key, value in User().__dict__.items()} print(type(user_info)) pprint(user.__dict__) pprint(user_info) pprint(ejemplo) outer_dict = {"outer_hello": {"hello": "hola"}, "outer_bye": {"bye", "adios"} } def myfunc(message): return message data = {outer_k: {inner_k: myfunc(inner_v) for inner_k, inner_v in outer_v.items()} for outer_k, outer_v in outer_dict.items()}