def worker(payload): http, item, url = payload if url is None: return ResolveWorkerResult(url=None, item=item, error=None, stack=None) kwargs = resolve_args(url, item) if resolve_args is not None else {} error, stack = resolve(http, url, max_redirects=max_redirects, follow_refresh_header=follow_refresh_header, follow_meta_refresh=follow_meta_refresh, follow_js_relocation=follow_js_relocation, **kwargs) return ResolveWorkerResult(url=url, item=item, error=error, stack=stack)
# 'https://www.google.com/url?q=https://www.facebook.com/Contaniunamenos/&sa=D&ust=1603455678482000&usg=AFQjCNFSANkezX4k8Fk4sY6xg30u6CHO2Q', # Invalid URL 'http://www.outremersbeyou.com/talent-de-la-semaine-la-designer-comorienne-aisha-wadaane-je-suis-fiere-de-mes-origines/', # Refresh header 'http://la-grange.net/2015/03/26/refresh/', # GET & UA nonsense 'https://ebay.us/BUkuxU', # Incorrect refresh header 'http://ow.ly/csT350v7mRc', # Utf-8 location header 'http://ow.ly/2awz50v1JkO', 'http://xfru.it/v2uFaC', # IP Host redirect 'https://bit.ly/2ANzJNW' ] http = create_pool() for url in URLS: print() error, stack = resolve(http, url, follow_meta_refresh=True) print(error) for item in stack: print(item)
# # GET & UA nonsense # 'https://ebay.us/BUkuxU', # # Incorrect refresh header # 'http://ow.ly/csT350v7mRc', # # Utf-8 location header # 'http://ow.ly/2awz50v1JkO', # 'http://xfru.it/v2uFaC', # # IP Host redirect # 'https://bit.ly/2ANzJNW', # Inference # 'https://test.com?url=http%3A%2F%2Flemonde.fr%3Fnext%3Dhttp%253A%252F%252Ftarget.fr', # 'http://lemonde.fr?url=http%3A%2F%2Flemonde.fr', 'https://www.ohaime-passion.com/fil-info/11291-soutien-total-aux-supporters-interpellees.html' ] http = create_pool() for url in URLS: print() error, stack = resolve(http, url, follow_meta_refresh=True, infer_redirection=True) print(type(error), error) for item in stack: print(item)