Exemple #1
0
    def _setup_entity_definitions(self):
        for key, value in entitydefs.items():
            value = value
            self._entitydefs_inverted[value] = key

        self._badchars_regex = re.compile('|'.join(entitydefs.values()))
        self._been_fixed_regex = re.compile(r'&\w+;|&#[0-9]+;')
Exemple #2
0
def remove_html_entities(text):
    """去除内容中包含的 html 实体
    注意,若和 remove_html_markup 一起使用,本方法应在其执行之后再使用
    """
    for entity_name, entity_unicode in entitydefs.items():
        text = text.replace('&{};'.format(entity_name), entity_unicode)

    return text
def fix_special_chars(html_text):
    leave_these = ['amp','frasl','gt','lt','quot']
    for k,v in entitydefs.items():
        if k not in leave_these:
            if v in html_text:
                html_text=html_text.replace(v,"&"+k+";")
    html_text=html_text.replace('∙',"∙")
    return html_text
Exemple #4
0
def remove_html_entities(text):
    """去除内容中包含的 html 实体
    注意,若和 remove_html_markup 一起使用,本方法应在其执行之后再使用
    """
    for entity_name, entity_unicode in entitydefs.items():
        text = text.replace('&{};'.format(entity_name), entity_unicode)

    return text
def description(item):
    global name_to_oid, oid_to_desc, oid_to_name, oid_to_pid
    oid = name_to_oid.get(item,0)
    desc = oid_to_desc.get(oid, None)
    if desc:
        for a,b in entitydefs.items(): desc = desc.replace("&%s;"%a,b)
        desc = desc.replace("\r","")
        for r,s in regices: desc = r.sub(s,desc)
        desc = desc.strip()
        return desc.split("\n")
    else: return []
Exemple #6
0
def description(item):
    global name_to_oid, oid_to_desc, oid_to_name, oid_to_pid
    oid = name_to_oid.get(item,0)
    desc = oid_to_desc.get(oid, None)
    if desc:
        for a,b in entitydefs.items(): desc = desc.replace("&%s;"%a,b)
        desc = desc.replace("\r","")
        for r,s in regices: desc = r.sub(s,desc)
        desc = desc.strip()
        return desc.split("\n")
    else: return []
def stringhtml(chaine):
    emap = {}
    for i in range(256):
        emap[chr(i)] = "&%d;" % i

    for entity, char in entitydefs.items():
        if char in emap:
            emap[char] = "&%s;" % entity

    def remplace(m, get=emap.get):
        return "".join(map(get, m.group()))

    return re.sub(r'[&<>\"\x80-\xff]+', remplace, chaine)
Exemple #8
0
def htmlentitydecode(string_cp1252: bytes) -> str:
    """
    decode the input-string, which is supposed to be CP1252
    and replace some special characters
    """
    try:
        ustr = string_cp1252.decode('cp1252')
        for key, value in entitydefs.items():
            if value.startswith('&'):
                ustr = ustr.replace(value, chr(name2codepoint[key]))
        for key in codepoint2name:
            ustr = ustr.replace(f'&#{key};', chr(key))
    except UnicodeDecodeError:
        ustr = string_cp1252
    return ustr
Exemple #9
0
def html_escape(text):
    """
    Escape HTML characters / entities in @text.
    """
    # We start by replacing '&'
    text = text.replace("&", "&amp;")

    if isinstance(text, bytes):
        # We use this to avoid UnicodeDecodeError in text.replace()
        convert = lambda x: x.decode("iso-8859-1")
    else:
        convert = lambda x: x

    # We don't want '&' in our dict, as it would mess up any previous
    # replace() we'd done
    entitydefs_inverted = ((convert(value), key)
                           for key, value in entitydefs.items()
                           if value != "&")
    for key, value in entitydefs_inverted:
        text = text.replace(key, "&%s;" % value)
    return text
Exemple #10
0
entitydefs['nbsp'] = ' '

sgmlentity = {
    'lt': '<',
    'gt': '>',
    'amp': '&',
    'quot': '"',
    'apos': '\'',
    'ndash': '-'
}
sgmlentityget = sgmlentity.get
_sgmlentkeys = list(sgmlentity.keys())

entcharrefs = {}
entcharrefsget = entcharrefs.get
for _k, _v in list(entitydefs.items()):
    if _k in _sgmlentkeys:
        continue
    if _v[0:2] == '&#':
        dec_code = _v[1:-1]
        _v = chr(int(_v[2:-1]))
        entcharrefs[dec_code] = _v
    else:
        dec_code = '#' + str(ord(_v))
        entcharrefs[dec_code] = _v
    entcharrefs[_k] = _v
del _sgmlentkeys, _k, _v
entcharrefs['#160'] = ' '
entcharrefs['#xA0'] = ' '
entcharrefs['#xa0'] = ' '
entcharrefs['#XA0'] = ' '
Exemple #11
0
             (FAN_PIECES[BLACK][BISHOP], 'J'), (FAN_PIECES[BLACK][ROOK], 'L'),
             (FAN_PIECES[BLACK][QUEEN], 'M'), (FAN_PIECES[BLACK][KING], 'N'),
             (FAN_PIECES[WHITE][KNIGHT], 'k'),
             (FAN_PIECES[WHITE][BISHOP], 'j'), (FAN_PIECES[WHITE][ROOK], 'l'),
             (FAN_PIECES[WHITE][QUEEN], 'm'), (FAN_PIECES[WHITE][KING], 'n'),
             ('†', '+'), ('‡', '+'), ('1/2', 'Z'))


def fanconv(fan):
    for f, r in lisPieces:
        fan = fan.replace(f, r)
    return fan

# Dictionaries and expressions for parsing diagrams
entitydefs = dict(("&%s;" % a, chr(ord(b)).encode('utf-8'))
                  for a, b in entitydefs.items() if len(b) == 1)
def2entity = dict((b, a) for a, b in entitydefs.items())

style = """
    @font-face {font-family: "Chess Alpha 2"; src: local("Chess Alpha 2"),
    url("http://pychess.org/fonts/ChessAlpha2.eot?") format("eot"),
    url("http://pychess.org/fonts/ChessAlpha2.woff") format("woff"),
    url("http://pychess.org/fonts/ChessAlpha2.ttf") format("truetype"),
    url("http://pychess.org/fonts/ChessAlpha2.svg#ChessAlpha2") format("svg"); font-weight:"normal"; font-style:"normal";}
    table.pychess {display:inline-block; vertical-align:top}
    table.pychess td {margin:0; padding:0; font-size:10pt; font-family:"Chess Alpha 2"; padding-left:.5em}
    table.pychess td.numa {width:0; text-align:right}
    table.pychess td.numa {width:0; text-align:right; padding-left:1em}
    table.pychess td.status {text-align:center; font-size:12pt; padding-right:2em}
    table.pychess pre {margin:0; padding:0; font-family:"Chess Alpha 2"; font-size:16pt; text-align:center; line-height:1}
"""
Exemple #12
0
        if size < 1024.0:
            import math

            if math.floor(size) == size:
                return "%d %s" % (int(size), final_unit)
            else:
                return "%3.1f %s" % (size, final_unit)

        if unit != "Yotta":
            size /= 1024.0

    return "%3.1f %s" % (size, final_unit)


entitydefs_inverted = {}
for k, v in list(entitydefs.items()):
    entitydefs_inverted[v] = k

_badchars_regex = re.compile("|".join(list(entitydefs.values())))
_been_fixed_regex = re.compile("&\w+;|&#[0-9]+;")


def html_entity_fixer(text, skipchars=[], extra_careful=1):
    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and _been_fixed_regex.findall(text):
        return text

    if type(skipchars) == type("s"):
        skipchars = [skipchars]
Exemple #13
0
# Handle HTML/XML/SGML entities.
try:
    from html.entities import entitydefs
except ImportError:
    from htmlentitydefs import entitydefs
entitydefs = entitydefs.copy()
entitydefsget = entitydefs.get
entitydefs['nbsp'] = ' '

sgmlentity = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\'', 'ndash': '-'}
sgmlentityget = sgmlentity.get
_sgmlentkeys = sgmlentity.keys()

entcharrefs = {}
entcharrefsget = entcharrefs.get
for _k, _v in entitydefs.items():
    if _k in _sgmlentkeys: continue
    if _v[0:2] == '&#':
        dec_code = _v[1:-1]
        _v = unichr(int(_v[2:-1]))
        entcharrefs[dec_code] = _v
    else:
        dec_code = '#' + str(ord(_v))
        _v = unicode(_v, 'latin_1', 'replace')
        entcharrefs[dec_code] = _v
    entcharrefs[_k] = _v
del _sgmlentkeys, _k, _v
entcharrefs['#160'] = u' '
entcharrefs['#xA0'] = u' '
entcharrefs['#xa0'] = u' '
entcharrefs['#XA0'] = u' '
Exemple #14
0
             (FAN_PIECES[BLACK][ROOK], 'L'), (FAN_PIECES[BLACK][QUEEN], 'M'),
             (FAN_PIECES[BLACK][KING], 'N'), (FAN_PIECES[WHITE][KNIGHT], 'k'),
             (FAN_PIECES[WHITE][BISHOP], 'j'), (FAN_PIECES[WHITE][ROOK], 'l'),
             (FAN_PIECES[WHITE][QUEEN], 'm'), (FAN_PIECES[WHITE][KING], 'n'),
             ('†', '+'), ('‡', '+'), ('1/2', 'Z'))


def fanconv(fan):
    for f, r in lisPieces:
        fan = fan.replace(f, r)
    return fan


# Dictionaries and expressions for parsing diagrams
entitydefs = dict(("&%s;" % a, chr(ord(b)).encode('utf-8'))
                  for a, b in entitydefs.items() if len(b) == 1)
def2entity = dict((b, a) for a, b in entitydefs.items())

style = """
    @font-face {font-family: "Chess Alpha 2"; src: local("Chess Alpha 2"),
    url("http://pychess.org/fonts/ChessAlpha2.eot?") format("eot"),
    url("http://pychess.org/fonts/ChessAlpha2.woff") format("woff"),
    url("http://pychess.org/fonts/ChessAlpha2.ttf") format("truetype"),
    url("http://pychess.org/fonts/ChessAlpha2.svg#ChessAlpha2") format("svg"); font-weight:"normal"; font-style:"normal";}
    table.pychess {display:inline-block; vertical-align:top}
    table.pychess td {margin:0; padding:0; font-size:10pt; font-family:"Chess Alpha 2"; padding-left:.5em}
    table.pychess td.numa {width:0; text-align:right}
    table.pychess td.numa {width:0; text-align:right; padding-left:1em}
    table.pychess td.status {text-align:center; font-size:12pt; padding-right:2em}
    table.pychess pre {margin:0; padding:0; font-family:"Chess Alpha 2"; font-size:16pt; text-align:center; line-height:1}
"""