예제 #1
0
    def __init__(self, url):
        HTMLParser.__init__(self)

        if url[-1] != '/':
            url += '/'
        self.__url = url
        self.links = set()
예제 #2
0
    def __init__(self, *args, **kwargs):
        if sys.version_info > (3,4):  #pragma: no cover
            HTMLParser.__init__(self, convert_charrefs=False)
        else:  #pragma: no cover
            HTMLParser.__init__(self)

        super(HTMLRewriter, self).__init__(*args, **kwargs)
예제 #3
0
    def __init__(self):
        if is_py3():
            HTMLParser.__init__(self, convert_charrefs=True)
        else:
            HTMLParser.__init__(self)

        self._output = ''
예제 #4
0
파일: toc.py 프로젝트: AlexPerrot/mkdocs
    def __init__(self):
        HTMLParser.__init__(self)
        self.links = []

        self.in_anchor = False
        self.attrs = None
        self.title = ''
예제 #5
0
    def __init__(self, *args, **kwargs):
        if sys.version_info > (3,4):  #pragma: no cover
            HTMLParser.__init__(self, convert_charrefs=False)
        else:  #pragma: no cover
            HTMLParser.__init__(self)

        super(HTMLRewriter, self).__init__(*args, **kwargs)
예제 #6
0
 def __init__(self, encoding='iso8859-1'):
     HTMLParser.__init__(self)
     self.encoding = encoding
     self.tagstack = []
     self.checkflag = 0  # Are we in a tag we check?
     self.inbody = 0
     self.__data = []
예제 #7
0
    def __init__(self, url, session=None, authentication=None, timeout=None):
        """Create instance of a directory parser.

        :param url: url of the directory on the web server.
        :param session: a requests Session instance used to fetch the directory
                        content. If None, a new session will be created.
        :param authentication: a tuple (username, password) to authenticate against
                               the web server, or None for no authentication. Note
                               that it will only be used if the given *session* is
                               None.
        :param timeout: timeout in seconds used when fetching the directory
                        content.
        """
        if not session:
            session = requests.Session()
            session.auth = authentication
        self.session = session
        self.timeout = timeout

        self.active_url = None
        self.entries = []

        HTMLParser.__init__(self)

        # Force the server to not send cached content
        headers = {'Cache-Control': 'max-age=0'}
        r = self.session.get(url, headers=headers, timeout=self.timeout)

        try:
            r.raise_for_status()
            self.feed(r.text)
        finally:
            r.close()
예제 #8
0
    def __init__(self, styled, styles=None):
        HTMLParser.__init__(self)

        self.s = ''
        self.styled = styled

        self.styles = styles if styles else default_styles
        self.style_stack = []
예제 #9
0
파일: styling.py 프로젝트: schwa/punic
    def __init__(self, style, styles = None):
        HTMLParser.__init__(self)

        self.s = ''
        self.style = style

        self.styles = styles if styles else default_styles
        self.style_stack = []
예제 #10
0
 def __init__(self, allows=None):
     HTMLParser.__init__(self)
     if allows is None:
         allows = []
     self.allow_tags = allows if allows else self.allow_tags
     self.result = []
     self.start = []
     self.data = []
예제 #11
0
파일: utils.py 프로젝트: joetboole/pelican
    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.truncate_at = None
예제 #12
0
 def __init__(self):
     HTMLParser.__init__(self)
     self._ignore = False
     self._ignorePath = None
     self._lasttag = None
     self._depth = 0
     self.depthText = {}  # path:text
     self.counting = 0
     self.lastN = 0
예제 #13
0
 def __init__(self, _file, search_tag):
     if six.PY3:
         super(TemplateParser, self).__init__()
     else:
         # HTMLParser is not a new-style class in py2
         HTMLParser.__init__(self)
     self.search_tag = search_tag
     self.file = _file
     self.parsed_data = []
예제 #14
0
    def __init__(self):
        HTMLParser.__init__(self)

        self.text_name = None
        self.original_value = None
        self.new_value = None

        self.in_tag = False
        self.read_buffer = six.StringIO()
예제 #15
0
 def __init__(self):
     HTMLParser.__init__(self)
     self._ignore = False
     self._ignorePath = None
     self._lasttag = None
     self._depth = 0
     self.depthText = {} # path:text
     self.counting = 0
     self.lastN = 0
예제 #16
0
    def __init__(self, media_locator, link_handler):
        HTMLParser.__init__(self)
        self.handlers_start = StartRules(media_locator, link_handler)
        self.handlers_startend = StartEndRules(media_locator, link_handler)
        self.handlers_end = EndRules()

        self.new_buffer()
        self.stack = deque()
        self.stack.append([])
    def __init__(self):
        HTMLParser.__init__(self)

        self.text_name = None
        self.original_value = None
        self.new_value = None

        self.in_tag = False
        self.read_buffer = six.StringIO()
예제 #18
0
    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.truncate_at = None
예제 #19
0
    def __init__(self, styled):
        HTMLParser.__init__(self)

        self.s = ''
        self.styled = styled

        self.styles = {'err': MyHTMLParser.term.red, 'ref': MyHTMLParser.term.yellow, 'rev': MyHTMLParser.term.bold, 'cmd': MyHTMLParser.term.cyan + self.term.underline, # 'sub': term.cyan,
            'echo': MyHTMLParser.term.yellow,}

        self.style_stack = []
예제 #20
0
	def __init__(self, skip_tags=[], debugger=None):
		self._root = None
		self._stack = []
		self._skip_tags = skip_tags
		self._skip = False, None
		self._hpd = debugger if debugger is not None else HtmlParserDebugger(debug=False)

		if is_py3():
			HTMLParser.__init__(self, convert_charrefs=True)
		else:
			HTMLParser.__init__(self)
예제 #21
0
파일: utils.py 프로젝트: cltrudeau/wrench
    def __init__(self, *args, **kwargs):
        if sys.version_info > (3, ):
            super(AnchorParser, self).__init__(*args, **kwargs)
        else:  # pragma: no cover
            # HTMLParser is still an old style object and so super doesn't
            # work
            HTMLParser.__init__(self, *args, **kwargs)

        self.capture = 0
        self.url = ''
        self.text = ''
예제 #22
0
파일: utils.py 프로젝트: cltrudeau/wrench
    def __init__(self, *args, **kwargs):
        if sys.version_info > (3,):
            super(AnchorParser, self).__init__(*args, **kwargs)
        else:   # pragma: no cover
            # HTMLParser is still an old style object and so super doesn't
            # work
            HTMLParser.__init__(self, *args, **kwargs)

        self.capture = 0
        self.url = ''
        self.text = ''
 def __init__(self):
     HTMLParser.__init__(self)
     self._encoding = 'ISO-8859-1'
     self._handlers = {'table_start' : self.table_start,
                       'table_end'   : self.table_end,
                       'tr_start'    : self.tr_start,
                       'tr_end'      : self.tr_end,
                       'td_start'    : self.td_start,
                       'td_end'      : self.td_end,
                       'th_start'    : self.td_start,
                       'th_end'      : self.td_end,
                       'br_start'    : self.br_start,
                       'meta_start'  : self.meta_start}
예제 #24
0
    def __init__(self, tag="a", attr="href", process=None, unique=False):
        HTMLParser.__init__(self)

        warnings.warn(
            "HtmlParserLinkExtractor is deprecated and will be removed in "
            "future releases. Please use scrapy.linkextractors.LinkExtractor",
            ScrapyDeprecationWarning, stacklevel=2,
        )

        self.scan_tag = tag if callable(tag) else lambda t: t == tag
        self.scan_attr = attr if callable(attr) else lambda a: a == attr
        self.process_attr = process if callable(process) else lambda v: v
        self.unique = unique
예제 #25
0
    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        try:
            HTMLParser.__init__(self, convert_charrefs=False)
        except TypeError:
            # pre Python 3.3
            HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.last_word_end = None
        self.truncate_at = None
예제 #26
0
    def __init__(self, tag="a", attr="href", process=None, unique=False):
        HTMLParser.__init__(self)

        warnings.warn(
            "HtmlParserLinkExtractor is deprecated and will be removed in "
            "future releases. Please use scrapy.linkextractors.LinkExtractor",
            ScrapyDeprecationWarning,
            stacklevel=2,
        )

        self.scan_tag = tag if callable(tag) else lambda t: t == tag
        self.scan_attr = attr if callable(attr) else lambda a: a == attr
        self.process_attr = process if callable(process) else lambda v: v
        self.unique = unique
예제 #27
0
파일: utils.py 프로젝트: 52M/pelican
    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        try:
            HTMLParser.__init__(self, convert_charrefs=False)
        except TypeError:
            # pre Python 3.3
            HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.last_word_end = None
        self.truncate_at = None
예제 #28
0
 def __init__(self):
     HTMLParser.__init__(self)
     self._encoding = 'ISO-8859-1'
     self._handlers = {
         'table_start': self.table_start,
         'table_end': self.table_end,
         'tr_start': self.tr_start,
         'tr_end': self.tr_end,
         'td_start': self.td_start,
         'td_end': self.td_end,
         'th_start': self.td_start,
         'th_end': self.td_end,
         'br_start': self.br_start,
         'meta_start': self.meta_start
     }
예제 #29
0
파일: styling.py 프로젝트: Photonomie/punic
    def __init__(self, styled):
        HTMLParser.__init__(self)

        self.s = ''
        self.styled = styled

        self.styles = {
            'err': MyHTMLParser.term.red,
            'ref': MyHTMLParser.term.yellow,
            'rev': MyHTMLParser.term.bold,
            'cmd': MyHTMLParser.term.cyan + self.term.underline,
            # 'sub': term.cyan,
            'echo': MyHTMLParser.term.yellow,
        }

        self.style_stack = []
예제 #30
0
        def __init__(self, settings, filename):
            try:
                # Python 3.4+
                HTMLParser.__init__(self, convert_charrefs=False)
            except TypeError:
                HTMLParser.__init__(self)
            self.body = ''
            self.metadata = {}
            self.settings = settings

            self._data_buffer = ''

            self._filename = filename

            self._in_top_level = True
            self._in_head = False
            self._in_title = False
            self._in_body = False
            self._in_tags = False
예제 #31
0
        def __init__(self, settings, filename):
            try:
                # Python 3.4+
                HTMLParser.__init__(self, convert_charrefs=False)
            except TypeError:
                HTMLParser.__init__(self)
            self.body = ''
            self.metadata = {}
            self.settings = settings

            self._data_buffer = ''

            self._filename = filename

            self._in_top_level = True
            self._in_head = False
            self._in_title = False
            self._in_body = False
            self._in_tags = False
예제 #32
0
 def __init__(self, typogrify, html_doc):
     self.html_doc = html_doc.strip()
     try:
         # Python 3.4+
         HTMLParser.__init__(self, convert_charrefs=False)
     except TypeError:
         HTMLParser.__init__(self)
     
     # Mark the new line positions - needed to
     # determine the position within the input string
     #
     # ACTUALLY - we should use StringIO here instead
     new_line = 1
     self.new_line_pos[new_line] = 0
     for index, char in enumerate(self.html_doc):
         if char == "\n":
             new_line += 1
             # Add one due to index being zero based
             self.new_line_pos[new_line] = index + 1
     
     self.typogrify = typogrify
     self.feed(self.html_doc)  # start parsing
예제 #33
0
    def __init__(self, search_anchor):
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False
예제 #34
0
 def __init__(self):
     self.metadata = {}
     HTMLParser.__init__(self)
예제 #35
0
    def __init__(self, search_anchor):
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False
예제 #36
0
    def __init__(self, search_anchor):
        # type: (unicode) -> None
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False
예제 #37
0
 def __init__(self):
     self.matched_urls = []
     HTMLParser.__init__(self)
예제 #38
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.match = False
     self.title = ''
예제 #39
0
 def __init__(self, pattern):
     HTMLParser.__init__(self)
     self.items = []
     self.pattern = pattern
예제 #40
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.reset()
     self.fed = []
예제 #41
0
    def __init__(self, search_anchor):
        # type: (unicode) -> None
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False
예제 #42
0
 def __init__(self, trans, render_embed_html_fn):
     HTMLParser.__init__(self)
     self.trans = trans
     self.ignore_content = False
     self.num_open_tags_for_ignore = 0
     self.render_embed_html_fn = render_embed_html_fn
예제 #43
0
 def __init__(self, url, out_dir):
     HTMLParser.__init__(self)
     self.url = url
     self.out_dir = out_dir
예제 #44
0
 def __init__(self):
     self.metadata = {}
     HTMLParser.__init__(self)
예제 #45
0
 def __init__(self,url,out_dir):
     HTMLParser.__init__(self)
     self.url = url
     self.out_dir = out_dir
예제 #46
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.text = []
예제 #47
0
 def __init__(self, target_tag):
     # Cannot use super() because HTMLParser is an old-style class in Python2
     HTMLParser.__init__(self)
     self.target_tag = target_tag
     self.cur_tag = None
     self.tag_content = ""
 def __init__(self, log):
     # type: (logging.Logger) -> None
     HTMLParser.__init__(self)  # old style class
     self.log = log
     self.link_to_license = None  # type: Optional[str]
예제 #49
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.links = []
예제 #50
0
 def __init__(self, trans, render_embed_html_fn):
     HTMLParser.__init__(self)
     self.trans = trans
     self.ignore_content = False
     self.num_open_tags_for_ignore = 0
     self.render_embed_html_fn = render_embed_html_fn
예제 #51
0
 def __init__(self):
     HTMLParser.__init__(self)
예제 #52
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.ppage = None
예제 #53
0
 def __init__(self, target_tag):
     # Cannot use super() because HTMLParser is an old-style class in Python2
     HTMLParser.__init__(self)
     self.target_tag = target_tag
     self.cur_tag = None
     self.tag_content = ""
예제 #54
0
 def __init__(self, pattern):
     HTMLParser.__init__(self)
     self.items = []
     self.pattern = pattern
예제 #55
0
 def __init__(self):
     HTMLParser.__init__(self)
     self.result = []