Exemple #1
0
import logging
import re

import wpull.util
from wpull.backport.logging import StyleAdapter
from wpull.document.html import HTMLReader
from wpull.document.htmlparse.element import Element
from wpull.document.util import detect_response_encoding
from wpull.pipeline.item import LinkType
from wpull.scraper.base import BaseHTMLScraper, ScrapeResult, LinkContext
from wpull.scraper.util import urljoin_safe, clean_link_soup, parse_refresh, \
    is_likely_inline, is_likely_link, is_unlikely_link, identify_link_type
from wpull.url import percent_decode

_ = gettext.gettext
_logger = StyleAdapter(logging.getLogger(__name__))


_BaseLinkInfo = collections.namedtuple(
    'LinkInfoType',
    [
        'element', 'tag', 'attrib', 'link',
        'inline', 'linked', 'base_link', 'value_type',
        'link_type'
    ]
)

class LinkInfo(_BaseLinkInfo):
    def __hash__(self):
        return self.link.__hash__()
Exemple #2
0
'''Delegation to other processor.'''
import gettext
import logging

import asyncio

from wpull.backport.logging import StyleAdapter
from wpull.pipeline.session import ItemSession
from wpull.processor.base import BaseProcessor

_logger = StyleAdapter(logging.getLogger())
_ = gettext.gettext


class DelegateProcessor(BaseProcessor):
    '''Delegate to Web or FTP processor.'''
    def __init__(self):
        self._processors = {}

    @asyncio.coroutine
    def process(self, item_session: ItemSession):
        scheme = item_session.url_record.url_info.scheme

        processor = self._processors.get(scheme)

        if processor:
            return (yield from processor.process(item_session))
        else:
            _logger.warning(
                _('No processor available to handle {scheme} scheme.'),
                scheme=repr(scheme))