Example #1
0
    def worker(payload):
        http, item, url = payload

        if url is None:
            return ResolveWorkerResult(url=None,
                                       item=item,
                                       error=None,
                                       stack=None)

        kwargs = resolve_args(url, item) if resolve_args is not None else {}

        error, stack = resolve(http,
                               url,
                               max_redirects=max_redirects,
                               follow_refresh_header=follow_refresh_header,
                               follow_meta_refresh=follow_meta_refresh,
                               follow_js_relocation=follow_js_relocation,
                               **kwargs)

        return ResolveWorkerResult(url=url,
                                   item=item,
                                   error=error,
                                   stack=stack)
Example #2
0
    # 'https://www.google.com/url?q=https://www.facebook.com/Contaniunamenos/&sa=D&ust=1603455678482000&usg=AFQjCNFSANkezX4k8Fk4sY6xg30u6CHO2Q',

    # Invalid URL
    'http://www.outremersbeyou.com/talent-de-la-semaine-la-designer-comorienne-aisha-wadaane-je-suis-fiere-de-mes-origines/',

    # Refresh header
    'http://la-grange.net/2015/03/26/refresh/',

    # GET & UA nonsense
    'https://ebay.us/BUkuxU',

    # Incorrect refresh header
    'http://ow.ly/csT350v7mRc',

    # Utf-8 location header
    'http://ow.ly/2awz50v1JkO',
    'http://xfru.it/v2uFaC',

    # IP Host redirect
    'https://bit.ly/2ANzJNW'
]

http = create_pool()

for url in URLS:
    print()
    error, stack = resolve(http, url, follow_meta_refresh=True)
    print(error)
    for item in stack:
        print(item)
Example #3
0
    # # GET & UA nonsense
    # 'https://ebay.us/BUkuxU',

    # # Incorrect refresh header
    # 'http://ow.ly/csT350v7mRc',

    # # Utf-8 location header
    # 'http://ow.ly/2awz50v1JkO',
    # 'http://xfru.it/v2uFaC',

    # # IP Host redirect
    # 'https://bit.ly/2ANzJNW',

    # Inference
    # 'https://test.com?url=http%3A%2F%2Flemonde.fr%3Fnext%3Dhttp%253A%252F%252Ftarget.fr',
    # 'http://lemonde.fr?url=http%3A%2F%2Flemonde.fr',
    'https://www.ohaime-passion.com/fil-info/11291-soutien-total-aux-supporters-interpellees.html'
]

http = create_pool()

for url in URLS:
    print()
    error, stack = resolve(http,
                           url,
                           follow_meta_refresh=True,
                           infer_redirection=True)
    print(type(error), error)
    for item in stack:
        print(item)