Exemple #1
0
# -*- coding: utf-8 -*-
import re
import cgi

from talon.quotations import (register_xpath_extensions, extract_from_html,
                              extract_from_plain)  # noqa
register_xpath_extensions()

from HTMLParser import HTMLParser


# http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
class MLStripper(HTMLParser):
    strippedTags = ["title", "script", "style"]

    def __init__(self):
        self.reset()
        self.fed = []
        self.strip_tag_contents_mode = False

    def handle_starttag(self, tag, attrs):
        # Strip the contents of a tag when it's
        # in strippedTags. We can do this because
        # HTMLParser won't try to parse the inner
        # contents of a tag.
        if tag.lower() in MLStripper.strippedTags:
            self.strip_tag_contents_mode = True

    def handle_endtag(self, tag):
        self.strip_tag_contents_mode = False
Exemple #2
0
def init(path_to_models=None):
    register_xpath_extensions()
    if ML_ENABLED:
        signature.initialize(path_to_models)
Exemple #3
0
# -*- coding: utf-8 -*-
import re
import cgi
from HTMLParser import HTMLParser, HTMLParseError
from talon.quotations import (register_xpath_extensions, extract_from_html,
                              extract_from_plain)  # noqa
register_xpath_extensions()

from inbox.log import get_logger

__all__ = ['strip_tags', 'plaintext2html', 'extract_from_html',
           'extract_from_plain']


# http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
class MLStripper(HTMLParser):
    strippedTags = ["title", "script", "style"]

    def __init__(self):
        self.reset()
        self.fed = []
        self.strip_tag_contents_mode = False

    def handle_starttag(self, tag, attrs):
        # Strip the contents of a tag when it's
        # in strippedTags. We can do this because
        # HTMLParser won't try to parse the inner
        # contents of a tag.
        if tag.lower() in MLStripper.strippedTags:
            self.strip_tag_contents_mode = True
Exemple #4
0
def init():
    register_xpath_extensions()
Exemple #5
0
def init():
    register_xpath_extensions()
    if ML_ENABLED:
        signature.initialize()
Exemple #6
0
def init():
    register_xpath_extensions()
    signature.initialize()
Exemple #7
0
def init():
    register_xpath_extensions()
    if ML_ENABLED:
        signature.initialize()
Exemple #8
0
def init():
    register_xpath_extensions()
    signature.initialize()
Exemple #9
0
def init():
    register_xpath_extensions()