Example #1
0
    def parse_emails(self, response):
        emails = set()
        url = response.url
        depth = response.meta['depth']

        markup = remove_tags(response.body, ['script', 'style'])
        # Parse the page response data for email links
        for email in extract_emails(markup):
            yield EmailItem({PageUrl: url, Depth: depth,
                             Email: email})
Example #2
0
def test_remove_tags_raises_error():
    html = full_page.FULL_PAGE
    with pytest.raises(ValueError):
        remove_tags(html, 'script')