Python HTMLParser.__init__ 예제들, six.moves.html_parser.HTMLParser.__init__ Python 예제들

예제 #1

0

파일 보기

    def __init__(self, url):
        HTMLParser.__init__(self)

        if url[-1] != '/':
            url += '/'
        self.__url = url
        self.links = set()

예제 #2

0

파일 보기

    def __init__(self, *args, **kwargs):
        if sys.version_info > (3,4):  #pragma: no cover
            HTMLParser.__init__(self, convert_charrefs=False)
        else:  #pragma: no cover
            HTMLParser.__init__(self)

        super(HTMLRewriter, self).__init__(*args, **kwargs)

예제 #3

0

파일 보기

    def __init__(self):
        if is_py3():
            HTMLParser.__init__(self, convert_charrefs=True)
        else:
            HTMLParser.__init__(self)

        self._output = ''

예제 #4

0

파일 보기

파일: toc.py 프로젝트: AlexPerrot/mkdocs

    def __init__(self):
        HTMLParser.__init__(self)
        self.links = []

        self.in_anchor = False
        self.attrs = None
        self.title = ''

예제 #5

0

파일 보기

파일: html_rewriter.py 프로젝트: daleathan/pywb

    def __init__(self, *args, **kwargs):
        if sys.version_info > (3,4):  #pragma: no cover
            HTMLParser.__init__(self, convert_charrefs=False)
        else:  #pragma: no cover
            HTMLParser.__init__(self)

        super(HTMLRewriter, self).__init__(*args, **kwargs)

예제 #6

0

파일 보기

 def __init__(self, encoding='iso8859-1'):
     HTMLParser.__init__(self)
     self.encoding = encoding
     self.tagstack = []
     self.checkflag = 0  # Are we in a tag we check?
     self.inbody = 0
     self.__data = []

예제 #7

0

파일 보기

    def __init__(self, url, session=None, authentication=None, timeout=None):
        """Create instance of a directory parser.

        :param url: url of the directory on the web server.
        :param session: a requests Session instance used to fetch the directory
                        content. If None, a new session will be created.
        :param authentication: a tuple (username, password) to authenticate against
                               the web server, or None for no authentication. Note
                               that it will only be used if the given *session* is
                               None.
        :param timeout: timeout in seconds used when fetching the directory
                        content.
        """
        if not session:
            session = requests.Session()
            session.auth = authentication
        self.session = session
        self.timeout = timeout

        self.active_url = None
        self.entries = []

        HTMLParser.__init__(self)

        # Force the server to not send cached content
        headers = {'Cache-Control': 'max-age=0'}
        r = self.session.get(url, headers=headers, timeout=self.timeout)

        try:
            r.raise_for_status()
            self.feed(r.text)
        finally:
            r.close()

예제 #8

0

파일 보기

    def __init__(self, styled, styles=None):
        HTMLParser.__init__(self)

        self.s = ''
        self.styled = styled

        self.styles = styles if styles else default_styles
        self.style_stack = []

예제 #9

0

파일 보기

파일: styling.py 프로젝트: schwa/punic

    def __init__(self, style, styles = None):
        HTMLParser.__init__(self)

        self.s = ''
        self.style = style

        self.styles = styles if styles else default_styles
        self.style_stack = []

예제 #10

0

파일 보기

 def __init__(self, allows=None):
     HTMLParser.__init__(self)
     if allows is None:
         allows = []
     self.allow_tags = allows if allows else self.allow_tags
     self.result = []
     self.start = []
     self.data = []

예제 #11

0

파일 보기

파일: utils.py 프로젝트: joetboole/pelican

    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.truncate_at = None

예제 #12

0

파일 보기

파일: webarticle2text.py 프로젝트: rmoorman/webarticle2text

 def __init__(self):
     HTMLParser.__init__(self)
     self._ignore = False
     self._ignorePath = None
     self._lasttag = None
     self._depth = 0
     self.depthText = {}  # path:text
     self.counting = 0
     self.lastN = 0

예제 #13

0

파일 보기

파일: check_grafana_uids.py 프로젝트: ekaynar/ceph-master

 def __init__(self, _file, search_tag):
     if six.PY3:
         super(TemplateParser, self).__init__()
     else:
         # HTMLParser is not a new-style class in py2
         HTMLParser.__init__(self)
     self.search_tag = search_tag
     self.file = _file
     self.parsed_data = []

예제 #14

0

파일 보기

    def __init__(self):
        HTMLParser.__init__(self)

        self.text_name = None
        self.original_value = None
        self.new_value = None

        self.in_tag = False
        self.read_buffer = six.StringIO()

예제 #15

0

파일 보기

파일: allPythonContent.py 프로젝트: Mondego/pyreco

 def __init__(self):
     HTMLParser.__init__(self)
     self._ignore = False
     self._ignorePath = None
     self._lasttag = None
     self._depth = 0
     self.depthText = {} # path:text
     self.counting = 0
     self.lastN = 0

예제 #16

0

파일 보기

파일: from_html.py 프로젝트: cecedille1/PDF_generator

    def __init__(self, media_locator, link_handler):
        HTMLParser.__init__(self)
        self.handlers_start = StartRules(media_locator, link_handler)
        self.handlers_startend = StartEndRules(media_locator, link_handler)
        self.handlers_end = EndRules()

        self.new_buffer()
        self.stack = deque()
        self.stack.append([])

예제 #17

0

파일 보기

파일: 0003_migrate_logs_to_new_fields.py 프로젝트: Nitrate/Nitrate

    def __init__(self):
        HTMLParser.__init__(self)

        self.text_name = None
        self.original_value = None
        self.new_value = None

        self.in_tag = False
        self.read_buffer = six.StringIO()

예제 #18

0

파일 보기

파일: utils.py 프로젝트: leduycuong86/pelican

    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.truncate_at = None

예제 #19

0

파일 보기

    def __init__(self, styled):
        HTMLParser.__init__(self)

        self.s = ''
        self.styled = styled

        self.styles = {'err': MyHTMLParser.term.red, 'ref': MyHTMLParser.term.yellow, 'rev': MyHTMLParser.term.bold, 'cmd': MyHTMLParser.term.cyan + self.term.underline, # 'sub': term.cyan,
            'echo': MyHTMLParser.term.yellow,}

        self.style_stack = []

예제 #20

0

파일 보기

파일: html_parser.py 프로젝트: usamnet000/4us

	def __init__(self, skip_tags=[], debugger=None):
		self._root = None
		self._stack = []
		self._skip_tags = skip_tags
		self._skip = False, None
		self._hpd = debugger if debugger is not None else HtmlParserDebugger(debug=False)

		if is_py3():
			HTMLParser.__init__(self, convert_charrefs=True)
		else:
			HTMLParser.__init__(self)

예제 #21

0

파일 보기

파일: utils.py 프로젝트: cltrudeau/wrench

    def __init__(self, *args, **kwargs):
        if sys.version_info > (3, ):
            super(AnchorParser, self).__init__(*args, **kwargs)
        else:  # pragma: no cover
            # HTMLParser is still an old style object and so super doesn't
            # work
            HTMLParser.__init__(self, *args, **kwargs)

        self.capture = 0
        self.url = ''
        self.text = ''

예제 #22

0

파일 보기

파일: utils.py 프로젝트: cltrudeau/wrench

    def __init__(self, *args, **kwargs):
        if sys.version_info > (3,):
            super(AnchorParser, self).__init__(*args, **kwargs)
        else:   # pragma: no cover
            # HTMLParser is still an old style object and so super doesn't
            # work
            HTMLParser.__init__(self, *args, **kwargs)

        self.capture = 0
        self.url = ''
        self.text = ''

예제 #23

0

파일 보기

파일: htmlreader.py 프로젝트: userzimmermann/robotframework-python3

 def __init__(self):
     HTMLParser.__init__(self)
     self._encoding = 'ISO-8859-1'
     self._handlers = {'table_start' : self.table_start,
                       'table_end'   : self.table_end,
                       'tr_start'    : self.tr_start,
                       'tr_end'      : self.tr_end,
                       'td_start'    : self.td_start,
                       'td_end'      : self.td_end,
                       'th_start'    : self.td_start,
                       'th_end'      : self.td_end,
                       'br_start'    : self.br_start,
                       'meta_start'  : self.meta_start}

예제 #24

0

파일 보기

파일: htmlparser.py 프로젝트: CPoirot3/scrapy

    def __init__(self, tag="a", attr="href", process=None, unique=False):
        HTMLParser.__init__(self)

        warnings.warn(
            "HtmlParserLinkExtractor is deprecated and will be removed in "
            "future releases. Please use scrapy.linkextractors.LinkExtractor",
            ScrapyDeprecationWarning, stacklevel=2,
        )

        self.scan_tag = tag if callable(tag) else lambda t: t == tag
        self.scan_attr = attr if callable(attr) else lambda a: a == attr
        self.process_attr = process if callable(process) else lambda v: v
        self.unique = unique

예제 #25

0

파일 보기

    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        try:
            HTMLParser.__init__(self, convert_charrefs=False)
        except TypeError:
            # pre Python 3.3
            HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.last_word_end = None
        self.truncate_at = None

예제 #26

0

파일 보기

파일: htmlparser.py 프로젝트: 306235911/IpPool

    def __init__(self, tag="a", attr="href", process=None, unique=False):
        HTMLParser.__init__(self)

        warnings.warn(
            "HtmlParserLinkExtractor is deprecated and will be removed in "
            "future releases. Please use scrapy.linkextractors.LinkExtractor",
            ScrapyDeprecationWarning,
            stacklevel=2,
        )

        self.scan_tag = tag if callable(tag) else lambda t: t == tag
        self.scan_attr = attr if callable(attr) else lambda a: a == attr
        self.process_attr = process if callable(process) else lambda v: v
        self.unique = unique

예제 #27

0

파일 보기

파일: utils.py 프로젝트: 52M/pelican

    def __init__(self, max_words):
        # In Python 2, HTMLParser is not a new-style class,
        # hence super() cannot be used.
        try:
            HTMLParser.__init__(self, convert_charrefs=False)
        except TypeError:
            # pre Python 3.3
            HTMLParser.__init__(self)

        self.max_words = max_words
        self.words_found = 0
        self.open_tags = []
        self.last_word_end = None
        self.truncate_at = None

예제 #28

0

파일 보기

 def __init__(self):
     HTMLParser.__init__(self)
     self._encoding = 'ISO-8859-1'
     self._handlers = {
         'table_start': self.table_start,
         'table_end': self.table_end,
         'tr_start': self.tr_start,
         'tr_end': self.tr_end,
         'td_start': self.td_start,
         'td_end': self.td_end,
         'th_start': self.td_start,
         'th_end': self.td_end,
         'br_start': self.br_start,
         'meta_start': self.meta_start
     }

예제 #29

0

파일 보기

파일: styling.py 프로젝트: Photonomie/punic

    def __init__(self, styled):
        HTMLParser.__init__(self)

        self.s = ''
        self.styled = styled

        self.styles = {
            'err': MyHTMLParser.term.red,
            'ref': MyHTMLParser.term.yellow,
            'rev': MyHTMLParser.term.bold,
            'cmd': MyHTMLParser.term.cyan + self.term.underline,
            # 'sub': term.cyan,
            'echo': MyHTMLParser.term.yellow,
        }

        self.style_stack = []

예제 #30

0

파일 보기

        def __init__(self, settings, filename):
            try:
                # Python 3.4+
                HTMLParser.__init__(self, convert_charrefs=False)
            except TypeError:
                HTMLParser.__init__(self)
            self.body = ''
            self.metadata = {}
            self.settings = settings

            self._data_buffer = ''

            self._filename = filename

            self._in_top_level = True
            self._in_head = False
            self._in_title = False
            self._in_body = False
            self._in_tags = False

예제 #31

0

파일 보기

파일: readers.py 프로젝트: andrew-vant/pelican

        def __init__(self, settings, filename):
            try:
                # Python 3.4+
                HTMLParser.__init__(self, convert_charrefs=False)
            except TypeError:
                HTMLParser.__init__(self)
            self.body = ''
            self.metadata = {}
            self.settings = settings

            self._data_buffer = ''

            self._filename = filename

            self._in_top_level = True
            self._in_head = False
            self._in_title = False
            self._in_body = False
            self._in_tags = False

예제 #32

0

파일 보기

파일: typogrify.py 프로젝트: barrysteyn/pelican-typogrify

 def __init__(self, typogrify, html_doc):
     self.html_doc = html_doc.strip()
     try:
         # Python 3.4+
         HTMLParser.__init__(self, convert_charrefs=False)
     except TypeError:
         HTMLParser.__init__(self)
     
     # Mark the new line positions - needed to
     # determine the position within the input string
     #
     # ACTUALLY - we should use StringIO here instead
     new_line = 1
     self.new_line_pos[new_line] = 0
     for index, char in enumerate(self.html_doc):
         if char == "\n":
             new_line += 1
             # Add one due to index being zero based
             self.new_line_pos[new_line] = index + 1
     
     self.typogrify = typogrify
     self.feed(self.html_doc)  # start parsing

예제 #33

0

파일 보기

    def __init__(self, search_anchor):
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False

예제 #34

0

파일 보기

파일: metadata.py 프로젝트: bhooshan-gadre/pycbc

 def __init__(self):
     self.metadata = {}
     HTMLParser.__init__(self)

예제 #35

0

파일 보기

파일: linkcheck.py 프로젝트: BlackYoup/sphinx

    def __init__(self, search_anchor):
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False

예제 #36

0

파일 보기

    def __init__(self, search_anchor):
        # type: (unicode) -> None
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False

예제 #37

0

파일 보기

파일: sofa_deriver.py 프로젝트: paulyc/erfa-fetch

 def __init__(self):
     self.matched_urls = []
     HTMLParser.__init__(self)

예제 #38

0

파일 보기

파일: anyurl_plugin.py 프로젝트: jkent/jkent-pybot

 def __init__(self):
     HTMLParser.__init__(self)
     self.match = False
     self.title = ''

예제 #39

0

파일 보기

 def __init__(self, pattern):
     HTMLParser.__init__(self)
     self.items = []
     self.pattern = pattern

예제 #40

0

파일 보기

 def __init__(self):
     HTMLParser.__init__(self)
     self.reset()
     self.fed = []

예제 #41

0

파일 보기

파일: linkcheck.py 프로젝트: Felix-neko/sphinx

    def __init__(self, search_anchor):
        # type: (unicode) -> None
        HTMLParser.__init__(self)

        self.search_anchor = search_anchor
        self.found = False

예제 #42

0

파일 보기

 def __init__(self, trans, render_embed_html_fn):
     HTMLParser.__init__(self)
     self.trans = trans
     self.ignore_content = False
     self.num_open_tags_for_ignore = 0
     self.render_embed_html_fn = render_embed_html_fn

예제 #43

0

파일 보기

파일: benchmark.py 프로젝트: bluedynamics/repoze.catalog

 def __init__(self, url, out_dir):
     HTMLParser.__init__(self)
     self.url = url
     self.out_dir = out_dir

예제 #44

0

파일 보기

 def __init__(self):
     self.metadata = {}
     HTMLParser.__init__(self)

예제 #45

0

파일 보기

파일: benchmark.py 프로젝트: repoze/repoze.catalog

 def __init__(self,url,out_dir):
     HTMLParser.__init__(self)
     self.url = url
     self.out_dir = out_dir

예제 #46

0

파일 보기

파일: bow.py 프로젝트: Tarmasterellis/bagofwords

 def __init__(self):
     HTMLParser.__init__(self)
     self.text = []

예제 #47

0

파일 보기

파일: workflow.py 프로젝트: lappsgrid-incubator/Galaxy

 def __init__(self, target_tag):
     # Cannot use super() because HTMLParser is an old-style class in Python2
     HTMLParser.__init__(self)
     self.target_tag = target_tag
     self.cur_tag = None
     self.tag_content = ""

예제 #48

0

파일 보기

파일: license_client.py 프로젝트: adfinis-forks/univention-corporate-server

 def __init__(self, log):
     # type: (logging.Logger) -> None
     HTMLParser.__init__(self)  # old style class
     self.log = log
     self.link_to_license = None  # type: Optional[str]

예제 #49

0

파일 보기

 def __init__(self):
     HTMLParser.__init__(self)
     self.links = []

예제 #50

0

파일 보기

파일: page.py 프로젝트: ImmPortDB/immport-galaxy

 def __init__(self, trans, render_embed_html_fn):
     HTMLParser.__init__(self)
     self.trans = trans
     self.ignore_content = False
     self.num_open_tags_for_ignore = 0
     self.render_embed_html_fn = render_embed_html_fn

예제 #51

0

파일 보기

파일: html.py 프로젝트: zyh329/robotframework-tools

 def __init__(self):
     HTMLParser.__init__(self)

예제 #52

0

파일 보기

파일: parselypage.py 프로젝트: Parsely/schemato

 def __init__(self):
     HTMLParser.__init__(self)
     self.ppage = None

예제 #53

0

파일 보기

 def __init__(self, target_tag):
     # Cannot use super() because HTMLParser is an old-style class in Python2
     HTMLParser.__init__(self)
     self.target_tag = target_tag
     self.cur_tag = None
     self.tag_content = ""

예제 #54

0

파일 보기

파일: vmimage.py 프로젝트: harish-24/avocado

 def __init__(self, pattern):
     HTMLParser.__init__(self)
     self.items = []
     self.pattern = pattern

예제 #55

0

파일 보기

파일: html.py 프로젝트: miki725/django-auxilium

 def __init__(self):
     HTMLParser.__init__(self)
     self.result = []