Esempi in Python per get, esempi in Python per html.entities.name2codepoint.get

Esempio n. 1

0

Mostra file

File: html.py Progetto: zanachka/w3lib

    def convert_entity(m):
        groups = m.groupdict()
        if groups.get('dec'):
            number = int(groups['dec'], 10)
        elif groups.get('hex'):
            number = int(groups['hex'], 16)
        elif groups.get('named'):
            entity_name = groups['named']
            if entity_name.lower() in keep:
                return m.group(0)
            else:
                number = (name2codepoint.get(entity_name)
                          or name2codepoint.get(entity_name.lower()))
        if number is not None:
            # Numeric character references in the 80-9F range are typically
            # interpreted by browsers as representing the characters mapped
            # to bytes 80-9F in the Windows-1252 encoding. For more info
            # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
            try:
                if 0x80 <= number <= 0x9f:
                    return bytes((number, )).decode('cp1252')
                else:
                    return chr(number)
            except ValueError:
                pass

        return '' if remove_illegal and groups.get('semicolon') else m.group(0)

Esempio n. 2

0

Mostra file

File: html.py Progetto: rrosajp/w3lib

    def convert_entity(m: Match) -> str:
        groups = m.groupdict()
        number = None
        if groups.get("dec"):
            number = int(groups["dec"], 10)
        elif groups.get("hex"):
            number = int(groups["hex"], 16)
        elif groups.get("named"):
            entity_name = groups["named"]
            if entity_name.lower() in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_name) or name2codepoint.get(
                    entity_name.lower())
        if number is not None:
            # Numeric character references in the 80-9F range are typically
            # interpreted by browsers as representing the characters mapped
            # to bytes 80-9F in the Windows-1252 encoding. For more info
            # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
            try:
                if 0x80 <= number <= 0x9F:
                    return bytes((number, )).decode("cp1252")
                else:
                    return chr(number)
            except ValueError:
                pass

        return "" if remove_illegal and groups.get("semicolon") else m.group(0)

Esempio n. 3

0

Mostra file

    def convert_entity(m):
        entity_body = m.group(3)
        if m.group(1):
            try:
                if m.group(2):
                    number = int(entity_body, 16)
                else:
                    number = int(entity_body, 10)
                # Numeric character references in the 80-9F range are typically
                # interpreted by browsers as representing the characters mapped
                # to bytes 80-9F in the Windows-1252 encoding. For more info
                # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
                if 0x80 <= number <= 0x9f:
                    return chr(number).decode('cp1252')
            except ValueError:
                number = None
        else:
            if entity_body in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_body)
        if number is not None:
            try:
                return chr(number)
            except ValueError:
                pass

        return '' if remove_illegal else m.group(0)

Esempio n. 4

0

Mostra file

File: text.py Progetto: timvieira/skid

    def convert_entity(m):
        entity_body = m.group(3)
        if m.group(1):
            try:
                if m.group(2):
                    number = int(entity_body, 16)
                else:
                    number = int(entity_body, 10)
                # Numeric character references in the 80-9F range are typically
                # interpreted by browsers as representing the characters mapped
                # to bytes 80-9F in the Windows-1252 encoding. For more info
                # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
                if 0x80 <= number <= 0x9f:
                    return chr(number).decode('cp1252')
            except ValueError:
                number = None
        else:
            if entity_body in keep:
                return m.group(0)
            else:
                number = name2codepoint.get(entity_body)
        if number is not None:
            try:
                return chr(number)
            except ValueError:
                pass

        return '' if remove_illegal else m.group(0)

Esempio n. 5

0

Mostra file

File: plugin.py Progetto: AlanBell/Supybot-plugins

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":    # number, decimal or hexadecimal
         return unichr(int(ent)) if match.group(2) == '' else unichr(int('0x'+ent,16))
     else:    # name
         cp = name2codepoint.get(ent)
         return unichr(cp) if cp else match.group()

Esempio n. 6

0

Mostra file

File: plugin.py Progetto: kytvi2p/Supybot-plugins-1

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":  # number, decimal or hexadecimal
         return unichr(int(ent)) if match.group(2) == '' else unichr(
             int('0x' + ent, 16))
     else:  # name
         cp = name2codepoint.get(ent)
         return unichr(cp) if cp else match.group()

Esempio n. 7

0

Mostra file

def decode_entity(match):
    what = match.group(1)
    if what.startswith('#x'):
        what = int(what[2:], 16)
    elif what.startswith('#'):
        what = int(what[1:])
    else:
        from html.entities import name2codepoint
        what = name2codepoint.get(what, match.group(0))
    return uchr(what)

Esempio n. 8

0

Mostra file

 def subst_entity(match):
     ent = match.group(2)
     if match.group(1) == '#':
         return unichr(int(ent))
     else:
         cp = n2cp.get(ent)
         if cp:
             return unichr(cp)
         else:
             return match.group()

Esempio n. 9

0

Mostra file

File: package_index.py Progetto: drbazzi/cs169

def decode_entity(match):
    what = match.group(1)
    if what.startswith('#x'):
        what = int(what[2:], 16)
    elif what.startswith('#'):
        what = int(what[1:])
    else:
        from html.entities import name2codepoint
        what = name2codepoint.get(what, match.group(0))
    return uchr(what)

Esempio n. 10

0

Mostra file

def _substitute_entity(m):
    ent = m.group(2)
    if m.group(1) == "#":
        return chr(int(ent))
    else:
        cp = name2codepoint.get(ent)
        if cp:
            return chr(cp)
        else:
            return m.group()

Esempio n. 11

0

Mostra file

File: mediawiki_in.py Progetto: sahwar/moin

 def inline_entity_repl(self, stack, entity):
     if entity[1] == '#':
         if entity[2] == 'x':
             c = int(entity[3:-1], 16)
         else:
             c = int(entity[2:-1], 10)
         c = chr(c)
     else:
         c = chr(name2codepoint.get(entity[1:-1], 0xfffe))
     stack.top_append(c)

Esempio n. 12

0

Mostra file

File: scrapemark.py Progetto: bsidhom/python3-scrapemark

def _substitute_entity(m):
    ent = m.group(2)
    if m.group(1) == "#":
        return chr(int(ent))
    else:
        cp = name2codepoint.get(ent)
        if cp:
            return chr(cp)
        else:
            return m.group()

Esempio n. 13

0

Mostra file

File: HeadlineNewsGramplet.py Progetto: dbareis/DB_gramps_addons-source

def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            return chr(cp)
        else:
            return match.group()

Esempio n. 14

0

Mostra file

File: http.py Progetto: bytebit-ch/uguubot

 def substitute_entity(match):
     from html.entities import name2codepoint as n2cp
     ent = match.group(2)
     if match.group(1) == "#":
         return chr(int(ent))
     else:
         cp = n2cp.get(ent)
         if cp:
             return chr(cp)
         else:
             return match.group()

Esempio n. 15

0

Mostra file

    def substitute_entity(match):
        ent = match.group(2)
        if match.group(1) == "#":
            # numeric substitution
            return chr(int(ent))
        else:
            # get the codepoint from the name
            cp = n2cp.get(ent)

        if cp:
            #if a codepoint was found, return it's string value
            return chr(cp)
        else:
            # codepoint wasn't found, return the match untouched
            return match.group()

Esempio n. 16

0

Mostra file

File: tibiacom.py Progetto: pombredanne/anacrolix

    def substitute_entity(match):
        ent = match.group(2)
        if match.group(1) == "#":
            # numeric substitution
            return chr(int(ent))
        else:
            # get the codepoint from the name
            cp = n2cp.get(ent)

        if cp:
            #if a codepoint was found, return it's string value
            return chr(cp)
        else:
            # codepoint wasn't found, return the match untouched
            return match.group()

Esempio n. 17

0

Mostra file

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            return unichr(int(ent)).encode('utf-8')
        else:
            cp = n2cp.get(ent)

            if cp:
                return unichr(cp).encode('utf-8')
            else:
                return match.group()

Esempio n. 18

0

Mostra file

File: HeadlineNewsGramplet.py Progetto: SNoiraud/addons-source

def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        try:
            return unichr(int(ent))
        except:
            return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            try:
                return unichr(cp)
            except:
                return chr(cp)
        else:
            return match.group()

Esempio n. 19

0

Mostra file

File: utils.py Progetto: wsgan001/snk.dev-assistant

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":
         # decoding by number
         if match.group(2) == '':
             # number is in decimal
             return unichr(int(ent))
         elif match.group(2) == 'x':
             # number is in hex
             return unichr(int('0x' + ent, 16))
     else:
         # they were using a name
         cp = n2cp.get(ent)
         if cp:
             return unichr(cp)
         else:
             return match.group()

Esempio n. 20

0

Mostra file

File: utils.py Progetto: biddyweb/gensim

 def substitute_entity(match):
     ent = match.group(3)
     if match.group(1) == "#":
         # decoding by number
         if match.group(2) == '':
             # number is in decimal
             return unichr(int(ent))
         elif match.group(2) == 'x':
             # number is in hex
             return unichr(int('0x' + ent, 16))
     else:
         # they were using a name
         cp = n2cp.get(ent)
         if cp:
             return unichr(cp)
         else:
             return match.group()

Esempio n. 21

0

Mostra file

File: tools.py Progetto: kconst/genmaybot

def substitute_entity(match):
  try:
    ent = match.group(3)

    if match.group(1) == "#":
        if match.group(2) == '':
            return chr(int(ent))
        elif match.group(2) == 'x':
            return chr(int('0x' + ent, 16))
    else:
        cp = n2cp.get(ent)

        if cp:
            return chr(cp)
        else:
            return match.group()
  except:
    return ""

Esempio n. 22

0

Mostra file

File: tools.py Progetto: loldi/palbot

def substitute_entity(match):
    try:
        ent = match.group(3)

        if match.group(1) == "#":
            if match.group(2) == '':
                return chr(int(ent))
            elif match.group(2) == 'x':
                return chr(int('0x' + ent, 16))
        else:
            cp = n2cp.get(ent)

            if cp:
                return chr(cp)
            else:
                return match.group()
    except:
        return ""

Esempio n. 23

0

Mostra file

File: scrapertools.py Progetto: jswxjj/addon

    def substitute_entity(match):
        if PY3:
            from html.entities import name2codepoint as n2cp
        else:
            from htmlentitydefs import name2codepoint as n2cp
        ent = match.group(2)
        if match.group(1) == "#":
            ent = unichr(int(ent)).encode('utf-8')
            if PY3 and isinstance(ent, bytes):
                ent = ent.decode("utf-8")
            return ent
        else:
            cp = n2cp.get(ent)

            if cp:
                cp = unichr(cp).encode('utf-8')
                if PY3 and isinstance(cp, bytes):
                    cp = cp.decode("utf-8")
                return cp
            else:
                return match.group()

Esempio n. 24

0

Mostra file

 def substitute_entity(match):
     try:
         ent = match.group(3)
         if match.group(1) == "#":
             # decoding by number
             if match.group(2) == '':
                 # number is in decimal
                 return safe_unichr(int(ent))
             elif match.group(2) in ['x', 'X']:
                 # number is in hex
                 return safe_unichr(int(ent, 16))
         else:
             # they were using a name
             cp = n2cp.get(ent)
             if cp:
                 return safe_unichr(cp)
             else:
                 return match.group()
     except Exception:
         # in case of errors, return original input
         return match.group()

Esempio n. 25

0

Mostra file

File: utils.py Progetto: AmitShah/gensim

 def substitute_entity(match):
     try:
         ent = match.group(3)
         if match.group(1) == "#":
             # decoding by number
             if match.group(2) == '':
                 # number is in decimal
                 return safe_unichr(int(ent))
             elif match.group(2) in ['x', 'X']:
                 # number is in hex
                 return safe_unichr(int(ent, 16))
         else:
             # they were using a name
             cp = n2cp.get(ent)
             if cp:
                 return safe_unichr(cp)
             else:
                 return match.group()
     except:
         # in case of errors, return original input
         return match.group()

Esempio n. 26

0

Mostra file

File: utils.py Progetto: ianatha/GAM

 def handle_entityref(self, name):
     cp = name2codepoint.get(name)
     if cp:
         self.__text.append(chr(cp))
     else:
         self.__text.append('&' + name)

Esempio n. 27

0

Mostra file

File: connection.py Progetto: CityScope/CSL_Guadalajara

def decode_xml_replacer(match):
  name=match.group(1)
  if(name.startswith("#")):
    return chr(int(name[1:],16))
  return chr(name2codepoint.get(name,'?'))

Esempio n. 28

0

Mostra file

File: htmlps.py Progetto: onia/translate

 def handle_entityref(self, name):
     """Handle named entities of the form &aaaa; e.g. &rsquo;"""
     if name in ['gt', 'lt', 'amp']:
         self.handle_data("&%s;" % name)
     else:
         self.handle_data(chr(name2codepoint.get(name, "&%s;" % name)))

Esempio n. 29

0

Mostra file

File: utils.py Progetto: jay0lee/GAM

 def handle_entityref(self, name):
   cp = name2codepoint.get(name)
   if cp:
     self.__text.append(chr(cp))
   else:
     self.__text.append('&' + name)

Esempio n. 30

0

Mostra file

 def handle_entityref(self, name):
     """Handle named entities of the form &aaaa; e.g. &rsquo;"""
     if name in ['gt', 'lt', 'amp']:
         self.handle_data("&%s;" % name)
     else:
         self.handle_data(chr(name2codepoint.get(name, u"&%s;" % name)))

Esempio n. 31

0

Mostra file

 def char_from_entity(match):
     code = name2codepoint.get(match.group(1), 0xFFFD)
     return chr(code)

Esempio n. 32

0

Mostra file

File: __init__.py Progetto: tonurics/puddletag

def convert_entities(s):
    s = re.sub('&#(\d+);', lambda m: chr(int(m.groups(0)[0])), s)
    return re.sub('&(\w)+;',
                  lambda m: n2cp.get(m.groups(0), '&%s;' % m.groups(0)[0]), s)

Esempio n. 33

0

Mostra file

 def handle_entityref(self, name):
     num = name2codepoint.get(name)
     if num is not None:
         self.handle_charref(num)