def testMatchesWithSurroundingLatinCharsAndLeadingPunctuation(self): # Contexts with trailing characters. Leading characters are okay here # since the numbers we will insert start with punctuation, but # trailing characters are still not allowed. possibleOnlyContexts = [] possibleOnlyContexts.append(NumberContext("abc", "def")) possibleOnlyContexts.append(NumberContext("", "def")) possibleOnlyContexts.append(NumberContext("", u("\u00C9"))) # Numbers should not be considered valid, if they have trailing Latin # characters, but should be considered possible. numberWithPlus = "+14156667777" numberWithBrackets = "(415)6667777" self.findMatchesInContexts(possibleOnlyContexts, False, True, "US", numberWithPlus) self.findMatchesInContexts(possibleOnlyContexts, False, True, "US", numberWithBrackets) validContexts = [] validContexts.append(NumberContext("abc", "")) validContexts.append(NumberContext(u("\u00C9"), "")) validContexts.append(NumberContext(u("\u00C9"), ".")) # Trailing punctuation. validContexts.append(NumberContext(u("\u00C9"), " def")) # Trailing white-space. # Numbers should be considered valid, since they start with punctuation. self.findMatchesInContexts(validContexts, True, True, "US", numberWithPlus) self.findMatchesInContexts(validContexts, True, True, "US", numberWithBrackets)
def testMoneyNotSeenAsPhoneNumber(self): possibleOnlyContexts = [] possibleOnlyContexts.append(NumberContext("$", "")) possibleOnlyContexts.append(NumberContext("", "$")) possibleOnlyContexts.append(NumberContext(u("\u00A3"), "")) # Pound sign possibleOnlyContexts.append(NumberContext(u("\u00A5"), "")) # Yen sign self.findMatchesInContexts(possibleOnlyContexts, False, True)
def testMatchesWithSurroundingChineseChars(self): validContexts = [] validContexts.append(NumberContext(u("\u6211\u7684\u7535\u8BDD\u53F7\u7801\u662F"), "")) validContexts.append(NumberContext("", u("\u662F\u6211\u7684\u7535\u8BDD\u53F7\u7801"))) validContexts.append(NumberContext(u("\u8BF7\u62E8\u6253"), u("\u6211\u5728\u660E\u5929"))) # Numbers should be considered valid, since they are surrounded by Chinese. self.findMatchesInContexts(validContexts, True, True)
def testGetDescriptionForKoreanNumber(self): self.assertEqual("Seoul", description_for_number(KO_NUMBER1, _ENGLISH)) self.assertEqual("Incheon", description_for_number(KO_NUMBER2, _ENGLISH)) self.assertEqual("Jeju", description_for_number(KO_NUMBER3, _ENGLISH)) self.assertEqual(u("\uC11C\uC6B8"), description_for_number(KO_NUMBER1, _KOREAN)) self.assertEqual(u("\uC778\uCC9C"), description_for_number(KO_NUMBER2, _KOREAN))
def testMatchesWithSurroundingLatinChars(self): possibleOnlyContexts = [] possibleOnlyContexts.append(NumberContext("abc", "def")) possibleOnlyContexts.append(NumberContext("abc", "")) possibleOnlyContexts.append(NumberContext("", "def")) # Latin capital letter e with an acute accent. possibleOnlyContexts.append(NumberContext(u("\u00C9"), "")) # e with an acute accent decomposed (with combining mark). possibleOnlyContexts.append(NumberContext(u("e\u0301"), "")) # Numbers should not be considered valid, if they are surrounded by # Latin characters, but should be considered possible. self.findMatchesInContexts(possibleOnlyContexts, False, True)
def testIsLatinLetter(self): self.assertTrue(PhoneNumberMatcher._is_latin_letter('c')) self.assertTrue(PhoneNumberMatcher._is_latin_letter('C')) self.assertTrue(PhoneNumberMatcher._is_latin_letter(u("\u00C9"))) self.assertTrue(PhoneNumberMatcher._is_latin_letter(u("\u0301"))) # Combining acute accent # Punctuation, digits and white-space are not considered "latin letters". self.assertFalse(PhoneNumberMatcher._is_latin_letter(':')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('5')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('-')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('.')) self.assertFalse(PhoneNumberMatcher._is_latin_letter(' ')) self.assertFalse(PhoneNumberMatcher._is_latin_letter(u("\u6211"))) # Chinese character self.assertFalse(PhoneNumberMatcher._is_latin_letter(u("\u306E"))) # Hiragana letter no
def _expand_formatting_rule(rule, national_prefix): """Formatting rules can include terms "$NP" and "$FG", These get replaced with: "$NP" => the national prefix "$FG" => the first group, i.e. "$1" """ if rule is None: return None if national_prefix is None: national_prefix = u("") rule = re.sub(u("\$NP"), national_prefix, rule) rule = re.sub(u("\$FG"), u("$1"), rule) return rule
def testFindWithInternationalPrefixes(self): self.doTestFindInContext("+1 (650) 333-6000", "NZ") self.doTestFindInContext("1-650-333-6000", "US") # Calling the US number from Singapore by using different service # providers # 1st test: calling using SingTel IDD service (IDD is 001) self.doTestFindInContext("0011-650-333-6000", "SG") # 2nd test: calling using StarHub IDD service (IDD is 008) self.doTestFindInContext("0081-650-333-6000", "SG") # 3rd test: calling using SingTel V019 service (IDD is 019) self.doTestFindInContext("0191-650-333-6000", "SG") # Calling the US number from Poland self.doTestFindInContext("0~01-650-333-6000", "PL") # Using "++" at the start. self.doTestFindInContext("++1 (650) 333-6000", "PL") # Using a full-width plus sign. self.doTestFindInContext(u("\uFF0B1 (650) 333-6000"), "SG") # The whole number, including punctuation, is here represented in # full-width form. self.doTestFindInContext(u("\uFF0B\uFF11\u3000\uFF08\uFF16\uFF15\uFF10\uFF09") + u("\u3000\uFF13\uFF13\uFF13\uFF0D\uFF16\uFF10\uFF10\uFF10"), "SG")
def testGetDescriptionForNumberWithNoDataFile(self): # No data file containing mappings for US numbers is available in Chinese for the unittests. As # a result, the country name of United States in simplified Chinese is returned. self.assertEqual(u("\u7F8E\u56FD"), description_for_number(US_NUMBER1, _CHINESE, region=_CHINA)) self.assertEqual("Bahamas", description_for_number(BS_NUMBER1, _ENGLISH, region=_USA)) self.assertEqual("Australia", description_for_number(AU_NUMBER, _ENGLISH, region=_USA)) self.assertEqual("", description_for_number(NUMBER_WITH_INVALID_COUNTRY_CODE, _ENGLISH, region=_USA)) self.assertEqual("", description_for_number(INTERNATIONAL_TOLL_FREE, _ENGLISH, region=_USA))
def testGetDescriptionForFallBack(self): # No fallback, as the location name for the given phone number is # available in the requested language. self.assertEqual("Kalifornien", description_for_number(US_NUMBER1, _GERMAN)) # German falls back to English. self.assertEqual("New York, NY", description_for_number(US_NUMBER3, _GERMAN)) # Italian falls back to English. self.assertEqual("CA", description_for_number(US_NUMBER1, _ITALIAN)) # Korean doesn't fall back to English. self.assertEqual(u("\uB300\uD55C\uBBFC\uAD6D"), description_for_number(KO_NUMBER3, _KOREAN))
def __init__(self, xtag): if xtag is None: self.o = None else: self.o = NumberFormat() self.o._mutable = True self.o.pattern = xtag.attrib['pattern'] # REQUIRED attribute self.o.format = _get_unique_child_value(xtag, 'format') if self.o.format is None: raise Exception("No format pattern found") else: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.format = re.sub('\$', u(r'\\'), self.o.format) xleading_digits = xtag.findall("leadingDigits") for xleading_digit in xleading_digits: self.o.leading_digits_pattern.append(_dews_re(xleading_digit.text))
def testCoverage(self): # Python version extra tests invalid_number = PhoneNumber(country_code=210, national_number=123456) self.assertEqual("", country_name_for_number(invalid_number, "en")) # Ensure we exercise all public entrypoints directly self.assertEqual("CA", _prefix_description_for_number(TEST_GEOCODE_DATA, TEST_GEOCODE_LONGEST_PREFIX, US_NUMBER1, "en")) self.assertEqual("CA", description_for_valid_number(US_NUMBER1, "en")) self.assertEqual("", description_for_valid_number(US_INVALID_NUMBER, "en")) # Add in some script and region specific fictional names TEST_GEOCODE_DATA['1650960'] = {'en': u("Mountain View, CA"), "en_GB": u("Mountain View California"), "en_US": u("Mountain View, Sunny California"), "en_Xyzz_US": u("MTV - xyzz"), "en_Latn": u("MountainView")} # The following test might one day return "Mountain View California" self.assertEqual("United States", description_for_number(US_NUMBER2, _ENGLISH, region="GB")) self.assertEqual("Mountain View, Sunny California", description_for_number(US_NUMBER2, _ENGLISH, region="US")) self.assertEqual("MountainView", description_for_number(US_NUMBER2, _ENGLISH, script="Latn")) self.assertEqual("United States", description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="GB")) self.assertEqual("MTV - xyzz", description_for_number(US_NUMBER2, _ENGLISH, script="Xyzz", region="US")) self.assertEqual("Mountain View, Sunny California", description_for_number(US_NUMBER2, _ENGLISH, script="Zazz", region="US")) # Get a different result when there is a script-specific variant self.assertEqual("MountainView", description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="US")) TEST_GEOCODE_DATA['1650960'] = {'en': u("Mountain View, CA")} # Test the locale mapping TEST_GEOCODE_DATA['8868'] = {'zh': u("Chinese"), 'zh_Hant': u("Hant-specific")} tw_number = FrozenPhoneNumber(country_code=886, national_number=810080123) self.assertEqual("Hant-specific", description_for_number(tw_number, "zh", region="TW")) del TEST_GEOCODE_DATA['8868']
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. data = { '44760': { 'en': 'British pager' }, '244917': { 'en': 'Angolan carrier' }, '4411': { 'en': 'British fixed line carrier' }, '1650213': { 'en': 'US carrier2' }, '1650212': { 'en': 'US carrier' }, '244262': { 'en': 'Angolan fixed line carrier' }, '4473': { 'en': 'British carrier', 'sv': u('Brittisk operat\u00f6r') }, }
def __init__(self, owning_xterr, xtag, national_prefix, national_prefix_formatting_rule, national_prefix_optional_when_formatting, carrier_code_formatting_rule): if xtag is None: self.o = None self.io = None else: self.o = NumberFormat() self.o._mutable = True # Find the REQUIRED attribute self.o.pattern = xtag.attrib['pattern'] # Find the IMPLIED attribute(s) self.o.domestic_carrier_code_formatting_rule = xtag.get('carrierCodeFormattingRule', None) self.o.national_prefix_formatting_rule = xtag.get('nationalPrefixFormattingRule', None) self.o.national_prefix_optional_when_formatting = get_optional_true_attrib(xtag, 'nationalPrefixOptionalWhenFormatting') # Post-process formatting rules for expansions and defaults if self.o.national_prefix_formatting_rule is not None: # expand abbreviations self.o.national_prefix_formatting_rule = _expand_formatting_rule(self.o.national_prefix_formatting_rule, national_prefix) else: # set to territory-wide formatting rule self.o.national_prefix_formatting_rule = national_prefix_formatting_rule if self.o.national_prefix_formatting_rule is not None: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.national_prefix_formatting_rule = re.sub('\$', r'\\', self.o.national_prefix_formatting_rule) if not self.o.national_prefix_optional_when_formatting and national_prefix_optional_when_formatting: # If attrib is None, it was missing and inherits territory-wide value self.o.national_prefix_optional_when_formatting = national_prefix_optional_when_formatting if self.o.domestic_carrier_code_formatting_rule is not None: # expand abbreviations self.o.domestic_carrier_code_formatting_rule = _expand_formatting_rule(self.o.domestic_carrier_code_formatting_rule, national_prefix) else: # set to territory-wide formatting rule self.o.domestic_carrier_code_formatting_rule = carrier_code_formatting_rule if self.o.domestic_carrier_code_formatting_rule is not None: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.domestic_carrier_code_formatting_rule = re.sub('\$(\d)', r'\\\1', self.o.domestic_carrier_code_formatting_rule) self.o.format = _get_unique_child_value(xtag, 'format') if self.o.format is None: raise Exception("No format pattern found") else: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.format = re.sub('\$', u(r'\\'), self.o.format) xleading_digits = xtag.findall("leadingDigits") for xleading_digit in xleading_digits: self.o.leading_digits_pattern.append(_dews_re(xleading_digit.text)) # Add this NumberFormat object into the owning metadata owning_xterr.o.number_format.append(self.o) # Extract the pattern for international format; if not present, use the national format. # If the intlFormat is set to "NA" the intlFormat should be ignored. self.io = NumberFormat(pattern=self.o.pattern, leading_digits_pattern=self.o.leading_digits_pattern) self.io._mutable = True intl_format = _get_unique_child_value(xtag, "intlFormat") if intl_format is None: # Default to use the same as the national pattern if none is defined. self.io.format = self.o.format else: # Replace '$1' etc with '\1' to match Python regexp group reference format intl_format = re.sub('\$', u(r'\\'), intl_format) if intl_format != DATA_NA: self.io.format = intl_format owning_xterr.has_explicit_intl_format = True if self.io.format is not None: # Add this international NumberFormat object into the owning metadata owning_xterr.o.intl_number_format.append(self.io)
# Copyright (C) 2011-2015 The Libphonenumber Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. data = { '8255':{'en': 'Gyeongnam', 'ko': u('\uacbd\ub0a8')}, '8232':{'en': 'Incheon', 'ko': u('\uc778\ucc9c')}, '8231':{'en': 'Gyeonggi', 'ko': u('\uacbd\uae30')}, '8251':{'en': 'Busan', 'ko': u('\ubd80\uc0b0')}, '8253':{'en': 'Daegu', 'ko': u('\ub300\uad6c')}, '8252':{'en': 'Ulsan', 'ko': u('\uc6b8\uc0b0')}, '1650960':{'en': 'Mountain View, CA'}, '1201':{'de': 'New Jersey', 'en': 'NJ'}, '8264':{'en': 'Jeju'}, '542214':{'en': 'La Plata'}, '1989':{'en': 'MA'}, '1212812':{'en': 'New York, NY'}, '1650':{'de': 'Kalifornien', 'en': 'CA'}, '1212':{'en': 'NY'}, '8254':{'en': 'Gyeongbuk', 'ko': u('\uacbd\ubd81')}, '8233':{'en': 'Gangwon', 'ko': u('\uac15\uc6d0')},
def __unicode__(self): return u(self.o)
def __unicode__(self): return u("\n").join([u("%s: %s") % (country_id, territory) for country_id, territory in self.territory.items()])
def __unicode__(self): return u(self.number_format)
NumberTest("(20) 3346 1234", "GB"), # Non-optional NP omitted ] # Strings with number-like things that should only be found up to and # including the "valid" leniency level. VALID_CASES = [NumberTest("65 02 53 00 00", "US"), NumberTest("6502 538365", "US"), NumberTest("650//253-1234", "US"), # 2 slashes are illegal at higher levels NumberTest("650/253/1234", "US"), NumberTest("9002309. 158", "US"), NumberTest("12 7/8 - 14 12/34 - 5", "US"), NumberTest("12.1 - 23.71 - 23.45", "US"), NumberTest("800 234 1 111x1111", "US"), NumberTest("1979-2011 100", "US"), NumberTest("+494949-4-94", "DE"), # National number in wrong format NumberTest(u("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17"), "US"), NumberTest("2012-0102 08", "US"), # Very strange formatting. NumberTest("2012-01-02 08", "US"), # Breakdown assistance number with unexpected formatting. NumberTest("1800-1-0-10 22", "AU"), NumberTest("030-3-2 23 12 34", "DE"), NumberTest("03 0 -3 2 23 12 34", "DE"), NumberTest("(0)3 0 -3 2 23 12 34", "DE"), NumberTest("0 3 0 -3 2 23 12 34", "DE"), ] # Strings with number-like things that should only be found up to and # including the "strict_grouping" leniency level. STRICT_GROUPING_CASES = [NumberTest("(415) 6667777", "US"), NumberTest("415-6667777", "US"), # Should be found by strict grouping but not exact
def __init__(self, owning_xterr, xtag, national_prefix, national_prefix_formatting_rule, national_prefix_optional_when_formatting, carrier_code_formatting_rule): if xtag is None: self.o = None self.io = None else: self.o = NumberFormat() self.o._mutable = True # Find the REQUIRED attribute self.o.pattern = xtag.attrib['pattern'] # Find the IMPLIED attribute(s) self.o.domestic_carrier_code_formatting_rule = xtag.get('carrierCodeFormattingRule', None) self.o.national_prefix_formatting_rule = xtag.get('nationalPrefixFormattingRule', None) self.o.national_prefix_optional_when_formatting = get_true_attrib(xtag, 'nationalPrefixOptionalWhenFormatting') # Post-process formatting rules for expansions and defaults if self.o.national_prefix_formatting_rule is not None: # expand abbreviations self.o.national_prefix_formatting_rule = _expand_formatting_rule(self.o.national_prefix_formatting_rule, national_prefix) else: # set to territory-wide formatting rule self.o.national_prefix_formatting_rule = national_prefix_formatting_rule if self.o.national_prefix_formatting_rule is not None: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.national_prefix_formatting_rule = re.sub('\$', r'\\', self.o.national_prefix_formatting_rule) if not self.o.national_prefix_optional_when_formatting: # If attrib is False, it was missing and inherits territory-wide value self.o.national_prefix_optional_when_formatting = national_prefix_optional_when_formatting if self.o.domestic_carrier_code_formatting_rule is not None: # expand abbreviations self.o.domestic_carrier_code_formatting_rule = _expand_formatting_rule(self.o.domestic_carrier_code_formatting_rule, national_prefix) else: # set to territory-wide formatting rule self.o.domestic_carrier_code_formatting_rule = carrier_code_formatting_rule if self.o.domestic_carrier_code_formatting_rule is not None: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.domestic_carrier_code_formatting_rule = re.sub('\$(\d)', r'\\\1', self.o.domestic_carrier_code_formatting_rule) self.o.format = _get_unique_child_value(xtag, 'format') if self.o.format is None: raise Exception("No format pattern found") else: # Replace '$1' etc with '\1' to match Python regexp group reference format self.o.format = re.sub('\$', u(r'\\'), self.o.format) xleading_digits = xtag.findall("leadingDigits") for xleading_digit in xleading_digits: self.o.leading_digits_pattern.append(_dews_re(xleading_digit.text)) # Add this NumberFormat object into the owning metadata owning_xterr.o.number_format.append(self.o) # Extract the pattern for international format; if not present, use the national format. # If the intlFormat is set to "NA" the intlFormat should be ignored. self.io = NumberFormat(pattern=self.o.pattern, leading_digits_pattern=self.o.leading_digits_pattern) self.io._mutable = True intl_format = _get_unique_child_value(xtag, "intlFormat") if intl_format is None: # Default to use the same as the national pattern if none is defined. self.io.format = self.o.format else: # Replace '$1' etc with '\1' to match Python regexp group reference format intl_format = re.sub('\$', u(r'\\'), intl_format) if intl_format != DATA_NA: self.io.format = intl_format owning_xterr.has_explicit_intl_format = True if self.io.format is not None: # Add this international NumberFormat object into the owning metadata owning_xterr.o.intl_number_format.append(self.io)
def testConnectsToEmergencyNumberWithPlusSign_US(self): self.assertFalse(connects_to_emergency_number("+911", "US")) self.assertFalse(connects_to_emergency_number(u("\uFF0B911"), "US")) self.assertFalse(connects_to_emergency_number(" +911", "US")) self.assertFalse(connects_to_emergency_number("+112", "US")) self.assertFalse(connects_to_emergency_number("+999", "US"))
def testGetDescriptionForMobilePortableRegion(self): self.assertEqual("British carrier", name_for_number(UK_MOBILE1, _ENGLISH)) self.assertEqual(u("Brittisk operat\u00F6r"), name_for_number(UK_MOBILE1, "sv", region="SE")) self.assertEqual("British carrier", name_for_number(UK_MOBILE1, _FRENCH)) # Returns an empty string because the UK implements mobile number portability. self.assertEqual("", safe_display_name(UK_MOBILE1, _ENGLISH))
}, '1650960': { 'en': 'Mountain View, CA' }, '1989': { 'en': 'MA' }, '542214': { 'en': 'La Plata' }, '8210': { 'en': 'Mobile prefix, should not be geocoded.' }, '822': { 'en': 'Seoul', 'ko': u('\uc11c\uc6b8') }, '8231': { 'en': 'Gyeonggi', 'ko': u('\uacbd\uae30') }, '8232': { 'en': 'Incheon', 'ko': u('\uc778\ucc9c') }, '8233': { 'en': 'Gangwon', 'ko': u('\uac15\uc6d0') }, '8241': { 'en': 'Chungnam',
def testCoverage(self): # Python version extra tests invalid_number = PhoneNumber(country_code=210, national_number=123456) self.assertEqual("", country_name_for_number(invalid_number, "en")) # Ensure we exercise all public entrypoints directly self.assertEqual( "CA", _prefix_description_for_number(TEST_GEOCODE_DATA, TEST_GEOCODE_LONGEST_PREFIX, US_NUMBER1, "en")) self.assertEqual("CA", description_for_valid_number(US_NUMBER1, "en")) self.assertEqual("", description_for_valid_number(US_INVALID_NUMBER, "en")) # Add in some script and region specific fictional names TEST_GEOCODE_DATA['1650960'] = { 'en': u("Mountain View, CA"), "en_GB": u("Mountain View California"), "en_US": u("Mountain View, Sunny California"), "en_Xyzz_US": u("MTV - xyzz"), "en_Latn": u("MountainView") } # The following test might one day return "Mountain View California" self.assertEqual( "United States", description_for_number(US_NUMBER2, _ENGLISH, region="GB")) self.assertEqual( "Mountain View, Sunny California", description_for_number(US_NUMBER2, _ENGLISH, region="US")) self.assertEqual( "MountainView", description_for_number(US_NUMBER2, _ENGLISH, script="Latn")) self.assertEqual( "United States", description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="GB")) self.assertEqual( "MTV - xyzz", description_for_number(US_NUMBER2, _ENGLISH, script="Xyzz", region="US")) self.assertEqual( "Mountain View, Sunny California", description_for_number(US_NUMBER2, _ENGLISH, script="Zazz", region="US")) # Get a different result when there is a script-specific variant self.assertEqual( "MountainView", description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="US")) TEST_GEOCODE_DATA['1650960'] = {'en': u("Mountain View, CA")} # Test the locale mapping TEST_GEOCODE_DATA['8868'] = { 'zh': u("Chinese"), 'zh_Hant': u("Hant-specific") } tw_number = FrozenPhoneNumber(country_code=886, national_number=810080123) self.assertEqual("Hant-specific", description_for_number(tw_number, "zh", region="TW")) del TEST_GEOCODE_DATA['8868']