def __init__(self, url): HTMLParser.__init__(self) if url[-1] != '/': url += '/' self.__url = url self.links = set()
def __init__(self, *args, **kwargs): if sys.version_info > (3,4): #pragma: no cover HTMLParser.__init__(self, convert_charrefs=False) else: #pragma: no cover HTMLParser.__init__(self) super(HTMLRewriter, self).__init__(*args, **kwargs)
def __init__(self): if is_py3(): HTMLParser.__init__(self, convert_charrefs=True) else: HTMLParser.__init__(self) self._output = ''
def __init__(self): HTMLParser.__init__(self) self.links = [] self.in_anchor = False self.attrs = None self.title = ''
def __init__(self, encoding='iso8859-1'): HTMLParser.__init__(self) self.encoding = encoding self.tagstack = [] self.checkflag = 0 # Are we in a tag we check? self.inbody = 0 self.__data = []
def __init__(self, url, session=None, authentication=None, timeout=None): """Create instance of a directory parser. :param url: url of the directory on the web server. :param session: a requests Session instance used to fetch the directory content. If None, a new session will be created. :param authentication: a tuple (username, password) to authenticate against the web server, or None for no authentication. Note that it will only be used if the given *session* is None. :param timeout: timeout in seconds used when fetching the directory content. """ if not session: session = requests.Session() session.auth = authentication self.session = session self.timeout = timeout self.active_url = None self.entries = [] HTMLParser.__init__(self) # Force the server to not send cached content headers = {'Cache-Control': 'max-age=0'} r = self.session.get(url, headers=headers, timeout=self.timeout) try: r.raise_for_status() self.feed(r.text) finally: r.close()
def __init__(self, styled, styles=None): HTMLParser.__init__(self) self.s = '' self.styled = styled self.styles = styles if styles else default_styles self.style_stack = []
def __init__(self, style, styles = None): HTMLParser.__init__(self) self.s = '' self.style = style self.styles = styles if styles else default_styles self.style_stack = []
def __init__(self, allows=None): HTMLParser.__init__(self) if allows is None: allows = [] self.allow_tags = allows if allows else self.allow_tags self.result = [] self.start = [] self.data = []
def __init__(self, max_words): # In Python 2, HTMLParser is not a new-style class, # hence super() cannot be used. HTMLParser.__init__(self) self.max_words = max_words self.words_found = 0 self.open_tags = [] self.truncate_at = None
def __init__(self): HTMLParser.__init__(self) self._ignore = False self._ignorePath = None self._lasttag = None self._depth = 0 self.depthText = {} # path:text self.counting = 0 self.lastN = 0
def __init__(self, _file, search_tag): if six.PY3: super(TemplateParser, self).__init__() else: # HTMLParser is not a new-style class in py2 HTMLParser.__init__(self) self.search_tag = search_tag self.file = _file self.parsed_data = []
def __init__(self): HTMLParser.__init__(self) self.text_name = None self.original_value = None self.new_value = None self.in_tag = False self.read_buffer = six.StringIO()
def __init__(self, media_locator, link_handler): HTMLParser.__init__(self) self.handlers_start = StartRules(media_locator, link_handler) self.handlers_startend = StartEndRules(media_locator, link_handler) self.handlers_end = EndRules() self.new_buffer() self.stack = deque() self.stack.append([])
def __init__(self, styled): HTMLParser.__init__(self) self.s = '' self.styled = styled self.styles = {'err': MyHTMLParser.term.red, 'ref': MyHTMLParser.term.yellow, 'rev': MyHTMLParser.term.bold, 'cmd': MyHTMLParser.term.cyan + self.term.underline, # 'sub': term.cyan, 'echo': MyHTMLParser.term.yellow,} self.style_stack = []
def __init__(self, skip_tags=[], debugger=None): self._root = None self._stack = [] self._skip_tags = skip_tags self._skip = False, None self._hpd = debugger if debugger is not None else HtmlParserDebugger(debug=False) if is_py3(): HTMLParser.__init__(self, convert_charrefs=True) else: HTMLParser.__init__(self)
def __init__(self, *args, **kwargs): if sys.version_info > (3, ): super(AnchorParser, self).__init__(*args, **kwargs) else: # pragma: no cover # HTMLParser is still an old style object and so super doesn't # work HTMLParser.__init__(self, *args, **kwargs) self.capture = 0 self.url = '' self.text = ''
def __init__(self, *args, **kwargs): if sys.version_info > (3,): super(AnchorParser, self).__init__(*args, **kwargs) else: # pragma: no cover # HTMLParser is still an old style object and so super doesn't # work HTMLParser.__init__(self, *args, **kwargs) self.capture = 0 self.url = '' self.text = ''
def __init__(self): HTMLParser.__init__(self) self._encoding = 'ISO-8859-1' self._handlers = {'table_start' : self.table_start, 'table_end' : self.table_end, 'tr_start' : self.tr_start, 'tr_end' : self.tr_end, 'td_start' : self.td_start, 'td_end' : self.td_end, 'th_start' : self.td_start, 'th_end' : self.td_end, 'br_start' : self.br_start, 'meta_start' : self.meta_start}
def __init__(self, tag="a", attr="href", process=None, unique=False): HTMLParser.__init__(self) warnings.warn( "HtmlParserLinkExtractor is deprecated and will be removed in " "future releases. Please use scrapy.linkextractors.LinkExtractor", ScrapyDeprecationWarning, stacklevel=2, ) self.scan_tag = tag if callable(tag) else lambda t: t == tag self.scan_attr = attr if callable(attr) else lambda a: a == attr self.process_attr = process if callable(process) else lambda v: v self.unique = unique
def __init__(self, max_words): # In Python 2, HTMLParser is not a new-style class, # hence super() cannot be used. try: HTMLParser.__init__(self, convert_charrefs=False) except TypeError: # pre Python 3.3 HTMLParser.__init__(self) self.max_words = max_words self.words_found = 0 self.open_tags = [] self.last_word_end = None self.truncate_at = None
def __init__(self): HTMLParser.__init__(self) self._encoding = 'ISO-8859-1' self._handlers = { 'table_start': self.table_start, 'table_end': self.table_end, 'tr_start': self.tr_start, 'tr_end': self.tr_end, 'td_start': self.td_start, 'td_end': self.td_end, 'th_start': self.td_start, 'th_end': self.td_end, 'br_start': self.br_start, 'meta_start': self.meta_start }
def __init__(self, styled): HTMLParser.__init__(self) self.s = '' self.styled = styled self.styles = { 'err': MyHTMLParser.term.red, 'ref': MyHTMLParser.term.yellow, 'rev': MyHTMLParser.term.bold, 'cmd': MyHTMLParser.term.cyan + self.term.underline, # 'sub': term.cyan, 'echo': MyHTMLParser.term.yellow, } self.style_stack = []
def __init__(self, settings, filename): try: # Python 3.4+ HTMLParser.__init__(self, convert_charrefs=False) except TypeError: HTMLParser.__init__(self) self.body = '' self.metadata = {} self.settings = settings self._data_buffer = '' self._filename = filename self._in_top_level = True self._in_head = False self._in_title = False self._in_body = False self._in_tags = False
def __init__(self, typogrify, html_doc): self.html_doc = html_doc.strip() try: # Python 3.4+ HTMLParser.__init__(self, convert_charrefs=False) except TypeError: HTMLParser.__init__(self) # Mark the new line positions - needed to # determine the position within the input string # # ACTUALLY - we should use StringIO here instead new_line = 1 self.new_line_pos[new_line] = 0 for index, char in enumerate(self.html_doc): if char == "\n": new_line += 1 # Add one due to index being zero based self.new_line_pos[new_line] = index + 1 self.typogrify = typogrify self.feed(self.html_doc) # start parsing
def __init__(self, search_anchor): HTMLParser.__init__(self) self.search_anchor = search_anchor self.found = False
def __init__(self): self.metadata = {} HTMLParser.__init__(self)
def __init__(self, search_anchor): # type: (unicode) -> None HTMLParser.__init__(self) self.search_anchor = search_anchor self.found = False
def __init__(self): self.matched_urls = [] HTMLParser.__init__(self)
def __init__(self): HTMLParser.__init__(self) self.match = False self.title = ''
def __init__(self, pattern): HTMLParser.__init__(self) self.items = [] self.pattern = pattern
def __init__(self): HTMLParser.__init__(self) self.reset() self.fed = []
def __init__(self, trans, render_embed_html_fn): HTMLParser.__init__(self) self.trans = trans self.ignore_content = False self.num_open_tags_for_ignore = 0 self.render_embed_html_fn = render_embed_html_fn
def __init__(self, url, out_dir): HTMLParser.__init__(self) self.url = url self.out_dir = out_dir
def __init__(self,url,out_dir): HTMLParser.__init__(self) self.url = url self.out_dir = out_dir
def __init__(self): HTMLParser.__init__(self) self.text = []
def __init__(self, target_tag): # Cannot use super() because HTMLParser is an old-style class in Python2 HTMLParser.__init__(self) self.target_tag = target_tag self.cur_tag = None self.tag_content = ""
def __init__(self, log): # type: (logging.Logger) -> None HTMLParser.__init__(self) # old style class self.log = log self.link_to_license = None # type: Optional[str]
def __init__(self): HTMLParser.__init__(self) self.links = []
def __init__(self): HTMLParser.__init__(self)
def __init__(self): HTMLParser.__init__(self) self.ppage = None
def __init__(self): HTMLParser.__init__(self) self.result = []