Ejemplo n.º 1
0
    def testAllCarrierSymbols(self):
        """Verify that emoji4unicode.xml covers each carrier's set of symbols.

    Verify that we have exactly one round-trip mapping for each carrier's
    symbol.
    """
        # One set of symbol Unicode code points per carrier.
        # First we enumerate all symbols and each symbol's carrier mappings,
        # adding a round-trip mapping to the carrier's set.
        # Then we compare each carrier's set to CarrierData.all_uni.
        # They should match.
        carrier_all_uni = {"docomo": set(), "kddi": set(), "softbank": set()}
        carriers = carrier_all_uni.keys()
        for symbol in emoji4unicode.GetSymbols():
            for carrier in carriers:
                uni = symbol.GetCarrierUnicode(carrier)
                if uni and not uni.startswith(">"):
                    self.failIf(
                        uni in carrier_all_uni[carrier],
                        "emoji4unicode.xml has two round-trip mappings with "
                        "%s %s" % (carrier, uni))
                    carrier_all_uni[carrier].add(uni)
        for carrier in carriers:
            e4u_set = carrier_all_uni[carrier]
            cd_set = emoji4unicode.all_carrier_data[carrier].all_uni
            self.assertEqual(
                e4u_set, cd_set, "Mismatched all_uni sets for %s:\n"
                "Missing from emoji4unicode.xml: %s\n"
                "Missing from CarrierData: %s" %
                (carrier, cd_set - e4u_set, e4u_set - cd_set))
Ejemplo n.º 2
0
def main():
    standardized_variants.Load()
    emoji_vs_code_points = standardized_variants.GetSetOfUnicodeWithEmojiVS()
    print
    print("Unicode Standard code points with emoji-style " +
          "Variation Selector sequences:")
    print
    print "C++: " + SetToUnicodeSetPattern(emoji_vs_code_points, _EscapeForCpp)
    print
    print "Java: " + SetToUnicodeSetPattern(emoji_vs_code_points,
                                            _EscapeForJava)

    emoji4unicode.Load()
    pua_vs_code_points = set()
    for symbol in emoji4unicode.GetSymbols():
        if symbol.UnicodeHasVariationSequence():
            # Get the Google Private Use Area code point.
            pua = symbol.GetCarrierUnicode("google")
            if not pua.startswith("<"):
                # Round-trip, must be a single code point.
                pua_vs_code_points.add(int(pua, 16))
    print
    print("Google PUA code points corresponding to Unicode Standard " +
          "code points with emoji-style Variation Selector sequences:")
    print
    print "C++: " + SetToUnicodeSetPattern(pua_vs_code_points, _EscapeForCpp)
    print
    print "Java: " + SetToUnicodeSetPattern(pua_vs_code_points, _EscapeForJava)

    if len(emoji_vs_code_points) != len(pua_vs_code_points):
        raise ValueError("Mismatch: %d standard code points with VS16 but " +
                         "%d corresponding Google PUA code points" %
                         (len(emoji_vs_code_points), len(pua_vs_code_points)))
 def testSymbolNames(self):
   cp2n = unicode_names.code_points_to_names
   n2cp = unicode_names.names_to_code_points
   differences = []
   collisions = []
   for symbol in emoji4unicode.GetSymbols():
     if not symbol.in_proposal: continue
     name = symbol.GetName()
     uni = symbol.GetUnicode()
     if not uni:
       # The proposal was accepted, the Emoji symbols were added to Unicode 6.0.
       # These are minor changes to deal with Emoji symbols
       # that are now encoded (not just proposed any more),
       # so that we need not modify the .xml data file.
       proposed_uni = symbol.GetProposedUnicode()
       if proposed_uni and unicode_age.GetAge(proposed_uni) >= "6.0":
         uni = proposed_uni
     if uni:
       unicode_name = cp2n.get(uni)
       if unicode_name and (name != unicode_name):
         msg = "name of e-%s %s differs from U+%s %s" % (symbol.id, name,
                                                         uni, unicode_name)
         print msg
         differences.append(msg)
     else:
       self.failIf(_INITIAL_DIGIT_RE.search(name),
                   "name %s of e-%s contains a word-initial digit" %
                   (name, symbol.id))
       uni = n2cp.get(name)
       if uni:
         msg = "name of e-%s %s collides with U+%s" % (symbol.id, name, uni)
         print msg
         collisions.append(msg)
   self.failIf(differences, differences)
   self.failIf(collisions, collisions)
Ejemplo n.º 4
0
 def testUniqueNames(self):
     """Verify that names of new symbols are unique."""
     new_names = set()
     for symbol in emoji4unicode.GetSymbols():
         if not symbol.in_proposal or symbol.GetUnicode(): continue
         name = symbol.GetName()
         self.failIf(name in new_names, "duplicate name: %s" % name)
         new_names.add(name)
Ejemplo n.º 5
0
 def testSymbolIDs(self):
     """Verify that symbol IDs are unique and well-formed."""
     id_re = re.compile(r"^[0-9A-F]{3,3}$")
     symbol_ids = set()
     for symbol in emoji4unicode.GetSymbols():
         self.assert_(id_re.match(symbol.id),
                      "Bad symbol ID %s" % symbol.id)
         self.failIf(symbol.id in symbol_ids,
                     "Duplicate symbol ID %s" % symbol.id)
         symbol_ids.add(symbol.id)
def _WriteCmapXML(writer):
    char_maps = []
    for symbol in emoji4unicode.GetSymbols():
        glyph_id = symbol.GetGlyphRefID()
        if glyph_id and symbol.in_proposal and not symbol.GetUnicode():
            char_maps.append((symbol.GetFontUnicode(), glyph_id))
    char_maps.sort()
    writer.write(CMAP_HEADER)
    for char_map in char_maps:
        writer.write('    <map charValue="0x%s" glyphRefID="%d"/>\n' %
                     (char_map[0], char_map[1]))
    writer.write(CMAP_FOOTER)
    writer.close()
 def testProposedCodePoints(self):
   """Verify that proposed code points are unique."""
   cp2n = unicode_names.code_points_to_names
   all_proposed_uni = set()
   for symbol in emoji4unicode.GetSymbols():
     proposed_uni = symbol.GetProposedUnicode()
     if not proposed_uni or unicode_age.GetAge(proposed_uni) >= "6.0": continue
     self.failIf(proposed_uni in cp2n,
                 "e-%s proposed U+%s already taken" %
                 (symbol.id, proposed_uni))
     self.failIf(proposed_uni in all_proposed_uni,
                 "e-%s proposed U+%s duplicate" %
                 (symbol.id, proposed_uni))
     all_proposed_uni.add(proposed_uni)
def _WritePostXML(writer):
    postscriptnames = []
    for symbol in emoji4unicode.GetSymbols():
        glyph_id = symbol.GetGlyphRefID()
        if glyph_id:
            if symbol.in_proposal and not symbol.GetUnicode():
                name = symbol.GetName().replace(" ", "_").replace("-",
                                                                  "_").lower()
                name = "uni" + symbol.GetFontUnicode() + "." + name
            else:
                name = ".notdef"
            postscriptnames.append((glyph_id, name))
    postscriptnames.sort()
    writer.write(POST_HEADER)
    for psn in postscriptnames:
        writer.write('  <PostScriptName glyphRefID="%d" NameString="%s" />\n' %
                     (psn[0], psn[1]))
    writer.write(POST_FOOTER)
    writer.close()
Ejemplo n.º 9
0
 def testGlyphIDs(self):
     """Verify that glyph IDs are unique, sufficient and contiguous."""
     glyph_ids = set()
     for symbol in emoji4unicode.GetSymbols():
         glyph_id = symbol.GetGlyphRefID()
         if not glyph_id:
             # Not every symbol has a glyph ID.
             self.assert_(
                 not symbol.in_proposal or symbol.GetUnicode(),
                 "Missing glyph ID for symbol e-%s "
                 "proposed for new encoding" % symbol.id)
             continue
         self.assert_(glyph_id >= 4, "Glyph ID %d less than 4" % glyph_id)
         self.failIf(glyph_id in glyph_ids,
                     "Duplicate glyph ID %d" % glyph_id)
         glyph_ids.add(glyph_id)
     min_glyph_id = min(glyph_ids)
     max_glyph_id = max(glyph_ids)
     full_set = set(range(min_glyph_id, max_glyph_id + 1))
     self.assert_(glyph_ids == full_set,
                  "Missing glyph IDs: %s" % (full_set - glyph_ids))
Ejemplo n.º 10
0
  def testShiftJis(self):
    """Check for source separation with standard Shift-JIS.

    No Unicode unification must be with a character from the JIS X 0208 part of
    Shift-JIS. This part has lead bytes below 0xF0.
    We consider only round-trip mappings because only those map the same
    characters between Unicode and Shift-JIS.
    (Fallbacks go to best-fit *similar* characters.)

    Japanese cell phone carriers encode Emoji symbols with Shift-JIS VDC codes.
    """
    errors = []
    for symbol in emoji4unicode.GetSymbols():
      uni = symbol.GetUnicode()
      if uni and uni in self.__shift_jis_ucm.round_trip_code_points:
        shift_jis = self.__shift_jis_ucm.from_unicode.get(uni)
        if shift_jis and shift_jis < "F":
          msg = ("source separation error: e-%s = U+%s = Shift-JIS-%s" %
                 (symbol.id, uni, shift_jis))
          print msg
          errors.append(msg)
    self.failIf(errors, errors)
def main():
  emoji4unicode.Load()
  docomo_data = emoji4unicode.all_carrier_data["docomo"]
  img_from_counts = {"docomo":0, "kddi":0, "softbank":0, "google":0}
  docomo_exp = 0
  only_docomo_exp = 0
  for symbol in emoji4unicode.GetSymbols():
    if not symbol.in_proposal: continue
    if symbol.GetUnicode(): continue
    img_from = symbol.ImageFromWhichCarrier()
    img_from_counts[img_from] += 1
    if img_from == "docomo":
      docomo_uni = symbol.GetCarrierUnicode("docomo")
      docomo_symbol = docomo_data.SymbolFromUnicode(docomo_uni)
      if docomo_symbol.number >= 300:  # Expansion Pictogram
        docomo_exp += 1
        has_kddi = False
        kddi_uni = symbol.GetCarrierUnicode("kddi")
        if kddi_uni and not kddi_uni.startswith(">"):
          has_kddi = True
        has_softbank = False
        softbank_uni = symbol.GetCarrierUnicode("softbank")
        if softbank_uni and not softbank_uni.startswith(">"):
          has_softbank = True
        msg = "e-%s img_from=docomo" % symbol.id
        if not has_kddi and not has_softbank:
          msg += " Expansion Pictogram only"
          only_docomo_exp += 1
        else:
          if has_kddi: msg += ", kddi available"
          if has_softbank: msg += ", softbank available"
        print msg
  print "Number of symbol images from which carrier:"
  print img_from_counts
  print ("Number of symbol images from DoCoMo Expansion Pictograms: %d" %
         docomo_exp)
  print ("Number of these symbol images where there are no KDDI or SoftBank "
         "round-trip mappings: %d" %
         only_docomo_exp)
Ejemplo n.º 12
0
 def testSymbolNames(self):
     cp2n = unicode_names.code_points_to_names
     n2cp = unicode_names.names_to_code_points
     differences = []
     collisions = []
     for symbol in emoji4unicode.GetSymbols():
         name = symbol.GetName()
         uni = symbol.GetUnicode()
         if uni:
             unicode_name = cp2n.get(uni)
             if unicode_name and (name != unicode_name):
                 msg = "name of e-%s %s differs from U+%s %s" % (
                     symbol.id, name, uni, unicode_name)
                 print msg
                 differences.append(msg)
         else:
             uni = n2cp.get(name)
             if uni:
                 msg = "name of e-%s %s collides with U+%s" % (symbol.id,
                                                               name, uni)
                 print msg
                 collisions.append(msg)
     self.failIf(differences, differences)
     self.failIf(collisions, collisions)