Python get Examples, html.entities.name2codepoint.get Python Examples

Example #1

0

Show file

File: html.py Project: zanachka/w3lib

    def convert_entity(m):
        groups = m.groupdict()
        if groups.get('dec'):
            number = int(groups['dec'], 10)
        elif groups.get('hex'):
            number = int(groups['hex'], 16)
        elif groups.get('named'):
            entity_name = groups['named']
            if entity_name.lower() in keep:
                return m.group(0)
            else:
                number = (name2codepoint.get(entity_name)
                          or name2codepoint.get(entity_name.lower()))
        if number is not None:
            # Numeric character references in the 80-9F range are typically
            # interpreted by browsers as representing the characters mapped
            # to bytes 80-9F in the Windows-1252 encoding. For more info
            # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
            try:
                if 0x80 <= number <= 0x9f:
                    return bytes((number, )).decode('cp1252')
                else:
                    return chr(number)
            except ValueError:
                pass

        return '' if remove_illegal and groups.get('semicolon') else m.group(0)

Example #2

0

Show file

File: html.py Project: rrosajp/w3lib

    def convert_entity(m: Match) -> str:
        groups = m.groupdict()
        number = None
        if groups.get("dec"):
            number = int(groups["dec"], 10)
        elif groups.get("hex"):
            number = int(groups["hex"], 16)
        elif groups.get("named"):
            entity_name = groups["named"]
            if entity_name.lower() in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_name) or name2codepoint.get(
                    entity_name.lower())
        if number is not None:
            # Numeric character references in the 80-9F range are typically
            # interpreted by browsers as representing the characters mapped
            # to bytes 80-9F in the Windows-1252 encoding. For more info
            # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
            try:
                if 0x80 <= number <= 0x9F:
                    return bytes((number, )).decode("cp1252")
                else:
                    return chr(number)
            except ValueError:
                pass

        return "" if remove_illegal and groups.get("semicolon") else m.group(0)

Example #3

0

Show file

    def convert_entity(m):
        entity_body = m.group(3)
        if m.group(1):
            try:
                if m.group(2):
                    number = int(entity_body, 16)
                else:
                    number = int(entity_body, 10)
                # Numeric character references in the 80-9F range are typically
                # interpreted by browsers as representing the characters mapped
                # to bytes 80-9F in the Windows-1252 encoding. For more info
                # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
                if 0x80 <= number <= 0x9f:
                    return chr(number).decode('cp1252')
            except ValueError:
                number = None
        else:
            if entity_body in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_body)
        if number is not None:
            try:
                return chr(number)
            except ValueError:
                pass

        return '' if remove_illegal else m.group(0)

Example #4

0

Show file

File: text.py Project: timvieira/skid

    def convert_entity(m):
        entity_body = m.group(3)
        if m.group(1):
            try:
                if m.group(2):
                    number = int(entity_body, 16)
                else:
                    number = int(entity_body, 10)
                # Numeric character references in the 80-9F range are typically
                # interpreted by browsers as representing the characters mapped
                # to bytes 80-9F in the Windows-1252 encoding. For more info
                # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
                if 0x80 <= number <= 0x9f:
                    return chr(number).decode('cp1252')
            except ValueError:
                number = None
        else:
            if entity_body in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_body)
        if number is not None:
            try:
                return chr(number)
            except ValueError:
                pass

        return '' if remove_illegal else m.group(0)

Example #5

0

Show file

File: plugin.py Project: AlanBell/Supybot-plugins

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":    # number, decimal or hexadecimal
         return unichr(int(ent)) if match.group(2) == '' else unichr(int('0x'+ent,16))
     else:    # name
         cp = name2codepoint.get(ent)
         return unichr(cp) if cp else match.group()

Example #6

0

Show file

File: plugin.py Project: kytvi2p/Supybot-plugins-1

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":  # number, decimal or hexadecimal
         return unichr(int(ent)) if match.group(2) == '' else unichr(
             int('0x' + ent, 16))
     else:  # name
         cp = name2codepoint.get(ent)
         return unichr(cp) if cp else match.group()

Example #7

0

Show file

def decode_entity(match):
    what = match.group(1)
    if what.startswith('#x'):
        what = int(what[2:], 16)
    elif what.startswith('#'):
        what = int(what[1:])
    else:
        from html.entities import name2codepoint
        what = name2codepoint.get(what, match.group(0))
    return uchr(what)

Example #8

0

Show file

 def subst_entity(match):
     ent = match.group(2)
     if match.group(1) == '#':
         return unichr(int(ent))
     else:
         cp = n2cp.get(ent)
         if cp:
             return unichr(cp)
         else:
             return match.group()

Example #9

0

Show file

File: package_index.py Project: drbazzi/cs169

def decode_entity(match):
    what = match.group(1)
    if what.startswith('#x'):
        what = int(what[2:], 16)
    elif what.startswith('#'):
        what = int(what[1:])
    else:
        from html.entities import name2codepoint
        what = name2codepoint.get(what, match.group(0))
    return uchr(what)

Example #10

0

Show file

def _substitute_entity(m):
    ent = m.group(2)
    if m.group(1) == "#":
        return chr(int(ent))
    else:
        cp = name2codepoint.get(ent)
        if cp:
            return chr(cp)
        else:
            return m.group()

Example #11

0

Show file

File: mediawiki_in.py Project: sahwar/moin

 def inline_entity_repl(self, stack, entity):
     if entity[1] == '#':
         if entity[2] == 'x':
             c = int(entity[3:-1], 16)
         else:
             c = int(entity[2:-1], 10)
         c = chr(c)
     else:
         c = chr(name2codepoint.get(entity[1:-1], 0xfffe))
     stack.top_append(c)

Example #12

0

Show file

File: scrapemark.py Project: bsidhom/python3-scrapemark

def _substitute_entity(m):
    ent = m.group(2)
    if m.group(1) == "#":
        return chr(int(ent))
    else:
        cp = name2codepoint.get(ent)
        if cp:
            return chr(cp)
        else:
            return m.group()

Example #13

0

Show file

File: HeadlineNewsGramplet.py Project: dbareis/DB_gramps_addons-source

def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            return chr(cp)
        else:
            return match.group()

Example #14

0

Show file

File: http.py Project: bytebit-ch/uguubot

 def substitute_entity(match):
     from html.entities import name2codepoint as n2cp
     ent = match.group(2)
     if match.group(1) == "#":
         return chr(int(ent))
     else:
         cp = n2cp.get(ent)
         if cp:
             return chr(cp)
         else:
             return match.group()

Example #15

0

Show file

    def substitute_entity(match):
        ent = match.group(2)
        if match.group(1) == "#":
            # numeric substitution
            return chr(int(ent))
        else:
            # get the codepoint from the name
            cp = n2cp.get(ent)

        if cp:
            #if a codepoint was found, return it's string value
            return chr(cp)
        else:
            # codepoint wasn't found, return the match untouched
            return match.group()

Example #16

0

Show file

File: tibiacom.py Project: pombredanne/anacrolix

    def substitute_entity(match):
        ent = match.group(2)
        if match.group(1) == "#":
            # numeric substitution
            return chr(int(ent))
        else:
            # get the codepoint from the name
            cp = n2cp.get(ent)

        if cp:
            #if a codepoint was found, return it's string value
            return chr(cp)
        else:
            # codepoint wasn't found, return the match untouched
            return match.group()

Example #17

0

Show file

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

Example #18

0

Show file

File: HeadlineNewsGramplet.py Project: SNoiraud/addons-source

def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        try:
            return unichr(int(ent))
        except:
            return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            try:
                return unichr(cp)
            except:
                return chr(cp)
        else:
            return match.group()

Example #19

0

Show file

File: utils.py Project: wsgan001/snk.dev-assistant

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":
         # decoding by number
         if match.group(2) == '':
             # number is in decimal
             return unichr(int(ent))
         elif match.group(2) == 'x':
             # number is in hex
             return unichr(int('0x' + ent, 16))
     else:
         # they were using a name
         cp = n2cp.get(ent)
         if cp:
             return unichr(cp)
         else:
             return match.group()

Example #20

0

Show file

File: utils.py Project: biddyweb/gensim

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":
         # decoding by number
         if match.group(2) == '':
             # number is in decimal
             return unichr(int(ent))
         elif match.group(2) == 'x':
             # number is in hex
             return unichr(int('0x' + ent, 16))
     else:
         # they were using a name
         cp = n2cp.get(ent)
         if cp:
             return unichr(cp)
         else:
             return match.group()

Example #21

0

Show file

File: tools.py Project: kconst/genmaybot

def substitute_entity(match):
  try:
    ent = match.group(3)

    if match.group(1) == "#":
        if match.group(2) == '':
            return chr(int(ent))
        elif match.group(2) == 'x':
            return chr(int('0x' + ent, 16))
    else:
        cp = n2cp.get(ent)

        if cp:
            return chr(cp)
        else:
            return match.group()
  except:
    return ""

Example #22

0

Show file

File: tools.py Project: loldi/palbot

def substitute_entity(match):
    try:
        ent = match.group(3)

        if match.group(1) == "#":
            if match.group(2) == '':
                return chr(int(ent))
            elif match.group(2) == 'x':
                return chr(int('0x' + ent, 16))
        else:
            cp = n2cp.get(ent)

            if cp:
                return chr(cp)
            else:
                return match.group()
    except:
        return ""

Example #23

0

Show file

File: scrapertools.py Project: jswxjj/addon

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            ent = unichr(int(ent)).encode('utf-8')
            if PY3 and isinstance(ent, bytes):
                ent = ent.decode("utf-8")
            return ent
        else:
            cp = n2cp.get(ent)

            if cp:
                cp = unichr(cp).encode('utf-8')
                if PY3 and isinstance(cp, bytes):
                    cp = cp.decode("utf-8")
                return cp
            else:
                return match.group()

Example #24

0

Show file

 def substitute_entity(match):
     try:
         ent = match.group(3)
         if match.group(1) == "#":
             # decoding by number
             if match.group(2) == '':
                 # number is in decimal
                 return safe_unichr(int(ent))
             elif match.group(2) in ['x', 'X']:
                 # number is in hex
                 return safe_unichr(int(ent, 16))
         else:
             # they were using a name
             cp = n2cp.get(ent)
             if cp:
                 return safe_unichr(cp)
             else:
                 return match.group()
     except Exception:
         # in case of errors, return original input
         return match.group()

Example #25

0

Show file

File: utils.py Project: AmitShah/gensim

 def substitute_entity(match):
     try:
         ent = match.group(3)
         if match.group(1) == "#":
             # decoding by number
             if match.group(2) == '':
                 # number is in decimal
                 return safe_unichr(int(ent))
             elif match.group(2) in ['x', 'X']:
                 # number is in hex
                 return safe_unichr(int(ent, 16))
         else:
             # they were using a name
             cp = n2cp.get(ent)
             if cp:
                 return safe_unichr(cp)
             else:
                 return match.group()
     except:
         # in case of errors, return original input
         return match.group()

Example #26

0

Show file

File: utils.py Project: ianatha/GAM

 def handle_entityref(self, name):
     cp = name2codepoint.get(name)
     if cp:
         self.__text.append(chr(cp))
     else:
         self.__text.append('&' + name)

Example #27

0

Show file

File: connection.py Project: CityScope/CSL_Guadalajara

def decode_xml_replacer(match):
  name=match.group(1)
  if(name.startswith("#")):
    return chr(int(name[1:],16))
  return chr(name2codepoint.get(name,'?'))

Example #28

0

Show file

File: htmlps.py Project: onia/translate

 def handle_entityref(self, name):
     """Handle named entities of the form &aaaa; e.g. &rsquo;"""
     if name in ['gt', 'lt', 'amp']:
         self.handle_data("&%s;" % name)
     else:
         self.handle_data(chr(name2codepoint.get(name, "&%s;" % name)))

Example #29

0

Show file

File: utils.py Project: jay0lee/GAM

 def handle_entityref(self, name):
   cp = name2codepoint.get(name)
   if cp:
     self.__text.append(chr(cp))
   else:
     self.__text.append('&' + name)

Example #30

0

Show file

 def handle_entityref(self, name):
     """Handle named entities of the form &aaaa; e.g. &rsquo;"""
     if name in ['gt', 'lt', 'amp']:
         self.handle_data("&%s;" % name)
     else:
         self.handle_data(chr(name2codepoint.get(name, u"&%s;" % name)))

Example #31

0

Show file

 def char_from_entity(match):
     code = name2codepoint.get(match.group(1), 0xFFFD)
     return chr(code)

Example #32

0

Show file

File: __init__.py Project: tonurics/puddletag

def convert_entities(s):
    s = re.sub('&#(\d+);', lambda m: chr(int(m.groups(0)[0])), s)
    return re.sub('&(\w)+;',
                  lambda m: n2cp.get(m.groups(0), '&%s;' % m.groups(0)[0]), s)

Example #33

0

Show file

 def handle_entityref(self, name):
     num = name2codepoint.get(name)
     if num is not None:
         self.handle_charref(num)