def findMatchesInContexts(self, contexts, isValid, isPossible, region="US", number="415-666-7777"): """Helper method which tests the contexts provided and ensures that: - if isValid is True, they all find a test number inserted in the middle when leniency of matching is set to VALID; else no test number should be extracted at that leniency level - if isPossible is True, they all find a test number inserted in the middle when leniency of matching is set to POSSIBLE; else no test number should be extracted at that leniency level""" if isValid: self.doTestInContext(number, region, contexts, Leniency.VALID) else: for context in contexts: text = context.leadingText + number + context.trailingText self.assertTrue( self.hasNoMatches(PhoneNumberMatcher(text, region)), msg="Should not have found a number in " + text) if isPossible: self.doTestInContext(number, region, contexts, Leniency.POSSIBLE) else: for context in contexts: text = context.leadingText + number + context.trailingText self.assertTrue(self.hasNoMatches( PhoneNumberMatcher(text, region, leniency=Leniency.POSSIBLE, max_tries=sys.maxint)), msg="Should not have found a number in " + text)
def testIsraelShortNumber(self): # Python version extra test: # Send in a 4-digit Israel phone number matcher = PhoneNumberMatcher("1234", "IL", leniency=Leniency.POSSIBLE) self.assertFalse(matcher.has_next()) matcher2 = PhoneNumberMatcher("*1234", "IL", leniency=Leniency.POSSIBLE) self.assertTrue(matcher2.has_next())
def doTestInContext(self, number, defaultCountry, contextPairs, leniency): for context in contextPairs: prefix = context.leadingText text = prefix + number + context.trailingText start = len(prefix) end = start + len(number) matcher = PhoneNumberMatcher(text, defaultCountry, leniency, sys.maxint) if matcher.has_next(): match = matcher.next() else: match = None self.assertTrue(match is not None, msg="Did not find a number in '" + text + "'; expected '" + number + "'") extracted = text[match.start:match.end] self.assertEqual(start, match.start, msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'") self.assertEqual(end, match.end, msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'") self.assertEqual(number, extracted) self.assertEqual(match.raw_string, extracted) self.ensureTermination(text, defaultCountry, leniency)
def doTestInContext(self, number, defaultCountry, contextPairs, leniency): for context in contextPairs: prefix = context.leadingText text = prefix + number + context.trailingText start = len(prefix) end = start + len(number) matcher = PhoneNumberMatcher(text, defaultCountry, leniency, sys.maxint) if matcher.has_next(): match = matcher.next() else: match = None self.assertTrue(match is not None, msg="Did not find a number in '" + text + "'; expected '" + number + "'") extracted = text[match.start:match.end] self.assertEquals(start, match.start, msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'") self.assertEquals(end, match.end, msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'") self.assertEquals(number, extracted) self.assertEquals(match.raw_string, extracted) self.ensureTermination(text, defaultCountry, leniency)
def testDoubleExtensionX(self): # Python version extra test - multiple x for extension marker xx_ext = "800 234 1 111 xx 1111" # This gives different results for different leniency values (and so # can't be used in a NumberTest). m0 = PhoneNumberMatcher(xx_ext, "US", leniency=Leniency.POSSIBLE).next() self.assertEqual(xx_ext, m0.raw_string) matcher2 = PhoneNumberMatcher(xx_ext, "US", leniency=Leniency.STRICT_GROUPING) self.assertFalse(matcher2.has_next())
def testEmptyIteration(self): matcher = PhoneNumberMatcher("", "ZZ") self.assertFalse(matcher.has_next()) self.assertFalse(matcher.has_next()) try: matcher.next() self.fail("Violation of the iterator contract.") except Exception: # Success pass self.assertFalse(matcher.has_next())
def testNonPlusPrefixedNumbersNotFoundForInvalidRegion(self): # Does not start with a "+", we won't match it. matcher = PhoneNumberMatcher("1 456 764 156", "ZZ") self.assertFalse(matcher.has_next()) try: matcher.next() self.fail("Violation of the Iterator contract.") except Exception: # Success pass self.assertFalse(matcher.has_next())
def testMatchesFoundWithMultipleSpaces(self): number1 = "(415) 666-7777" number2 = "(800) 443-1223" text = number1 + " " + number2 matcher = PhoneNumberMatcher(text, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, text, number1, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, text, number2, "US")
def assertEqualRange(self, text, index, start, end): """Asserts that another number can be found in text starting at index, and that its corresponding range is [start, end). """ sub = text[index:] matcher = PhoneNumberMatcher(sub, "NZ", Leniency.POSSIBLE, sys.maxint) self.assertTrue(matcher.has_next()) match = matcher.next() self.assertEqual(start - index, match.start) self.assertEqual(end - index, match.end) self.assertEqual(sub[match.start:match.end], match.raw_string)
def testIsLatinLetter(self): self.assertTrue(PhoneNumberMatcher._is_latin_letter('c')) self.assertTrue(PhoneNumberMatcher._is_latin_letter('C')) self.assertTrue(PhoneNumberMatcher._is_latin_letter(u'\u00C9')) self.assertTrue(PhoneNumberMatcher._is_latin_letter(u'\u0301')) # Combining acute accent # Punctuation, digits and white-space are not considered "latin letters". self.assertFalse(PhoneNumberMatcher._is_latin_letter(':')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('5')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('-')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('.')) self.assertFalse(PhoneNumberMatcher._is_latin_letter(' ')) self.assertFalse(PhoneNumberMatcher._is_latin_letter(u'\u6211')) # Chinese character
def assertEqualRange(self, text, index, start, end): """Asserts that another number can be found in text starting at index, and that its corresponding range is [start, end). """ sub = text[index:] matcher = PhoneNumberMatcher(sub, "NZ", Leniency.POSSIBLE, sys.maxint) self.assertTrue(matcher.has_next()) match = matcher.next() self.assertEquals(start - index, match.start) self.assertEquals(end - index, match.end) self.assertEquals(match.raw_string, sub[match.start:match.end])
def testNonMatchingBracketsAreInvalid(self): # The digits up to the ", " form a valid US number, but it shouldn't # be matched as one since there was a non-matching bracket present. self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 [79.964, 81.191]", "US"))) # The trailing "]" is thrown away before parsing, so the resultant # number, while a valid US number, does not have matching brackets. self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 [79.964]", "US"))) self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 ((79.964)", "US"))) # This case has too many sets of brackets to be valid. self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("(80).(585) (79).(9)64", "US")))
def testMatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation(self): text = "Call 650-253-4561 -- 455-234-3451" region = "US" number1 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region), national_number=6502534561) match1 = PhoneNumberMatch(5, "650-253-4561", number1) number2 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region), national_number=4552343451) match2 = PhoneNumberMatch(21, "455-234-3451", number2) matches = PhoneNumberMatcher(text, region) self.assertEqual(match1, matches.next()) self.assertEqual(match2, matches.next())
def testMatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation(self): text = "Call 650-253-4561 -- 455-234-3451" region = "US" number1 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region), national_number=6502534561L) match1 = PhoneNumberMatch(5, "650-253-4561", number1) number2 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region), national_number=4552343451L) match2 = PhoneNumberMatch(21, "455-234-3451", number2) matches = PhoneNumberMatcher(text, region) self.assertEqual(match1, matches.next()) self.assertEqual(match2, matches.next())
def testIsLatinLetter(self): self.assertTrue(PhoneNumberMatcher._is_latin_letter('c')) self.assertTrue(PhoneNumberMatcher._is_latin_letter('C')) self.assertTrue(PhoneNumberMatcher._is_latin_letter(u'\u00C9')) self.assertTrue(PhoneNumberMatcher._is_latin_letter( u'\u0301')) # Combining acute accent # Punctuation, digits and white-space are not considered "latin letters". self.assertFalse(PhoneNumberMatcher._is_latin_letter(':')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('5')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('-')) self.assertFalse(PhoneNumberMatcher._is_latin_letter('.')) self.assertFalse(PhoneNumberMatcher._is_latin_letter(' ')) self.assertFalse(PhoneNumberMatcher._is_latin_letter( u'\u6211')) # Chinese character
def testMatchWithSurroundingZipcodes(self): number = "415-666-7777" zipPreceding = "My address is CA 34215 - " + number + " is my number." expectedResult = phonenumberutil.parse(number, "US") matcher = PhoneNumberMatcher(zipPreceding, "US") if matcher.has_next(): match = matcher.next() else: match = None self.assertTrue(match is not None, msg="Did not find a number in '" + zipPreceding + "'; expected " + number) self.assertEqual(expectedResult, match.number) self.assertEqual(number, match.raw_string) # Now repeat, but this time the phone number has spaces in it. It should still be found. number = "(415) 666 7777" zipFollowing = "My number is " + number + ". 34215 is my zip-code." matcher = PhoneNumberMatcher(zipFollowing, "US") if matcher.has_next(): matchWithSpaces = matcher.next() else: matchWithSpaces = None self.assertTrue(matchWithSpaces is not None, msg="Did not find a number in '" + zipFollowing + "'; expected " + number) self.assertEqual(expectedResult, matchWithSpaces.number) self.assertEqual(number, matchWithSpaces.raw_string)
def ensureTermination(self, text, defaultCountry, leniency): for index in range(len(text) + 1): sub = text[index:] matches = "" # Iterates over all matches. for match in PhoneNumberMatcher(sub, defaultCountry, leniency, 65535): matches += ", " + str(match)
def testNoMatchIfRegionIsNone(self): # Fail on non-international prefix if region code is None. self.assertTrue( self.hasNoMatches( PhoneNumberMatcher( "Random text body - number is 0331 6005, see you there", None)))
def parse_file_for_nums(fpath, country_code): """Accepts a filepath & country_code. Returns all valid phonenumbers present in file.""" with open(fpath, 'r') as f: text = f.read() print("Parsing", fpath, "for", "numbers. Code :", country_code) matches = PhoneNumberMatcher(text, country_code) return list([formatNum(x.number) for x in matches])
def extract_contacts(line: str) -> Tuple[List[str], List[str]]: emails = EMAIL_RE.findall(line) # "how hard can it be to write a regex to match phone numbers?" # way too hard for international formats, as it turns out phones = [ format_number(match.number, PhoneNumberFormat.INTERNATIONAL) for match in PhoneNumberMatcher(line, "US") ] return emails, phones
def testFourMatchesInARow(self): number1 = "415-666-7777" number2 = "800-443-1223" number3 = "212-443-1223" number4 = "650-443-1223" text = number1 + " - " + number2 + " - " + number3 + " - " + number4 matcher = PhoneNumberMatcher(text, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, text, number1, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, text, number2, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, text, number3, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, text, number4, "US")
def testSequences(self): # Test multiple occurrences. text = "Call 033316005 or 032316005!" region = "NZ" number1 = PhoneNumber() number1.country_code = phonenumberutil.country_code_for_region(region) number1.national_number = 33316005 match1 = PhoneNumberMatch(5, "033316005", number1) number2 = PhoneNumber() number2.country_code = phonenumberutil.country_code_for_region(region) number2.national_number = 32316005 match2 = PhoneNumberMatch(19, "032316005", number2) matcher = PhoneNumberMatcher(text, region, Leniency.POSSIBLE, sys.maxint) self.assertEquals(match1, matcher.next()) self.assertEquals(match2, matcher.next()) self.assertFalse(matcher.has_next())
def testSequences(self): # Test multiple occurrences. text = "Call 033316005 or 032316005!" region = "NZ" number1 = PhoneNumber() number1.country_code = phonenumberutil.country_code_for_region(region) number1.national_number = 33316005 match1 = PhoneNumberMatch(5, "033316005", number1) number2 = PhoneNumber() number2.country_code = phonenumberutil.country_code_for_region(region) number2.national_number = 32316005 match2 = PhoneNumberMatch(19, "032316005", number2) matcher = PhoneNumberMatcher(text, region, Leniency.POSSIBLE, 65535) self.assertEqual(match1, matcher.next()) self.assertEqual(match2, matcher.next()) self.assertFalse(matcher.has_next())
def testMaxMatchesMixed(self): # Set up text with 100 valid numbers inside an invalid number. numbers = "My info: 415-666-7777 123 fake street" * 100 # Only matches the first 10 despite there being 100 numbers due to max matches. number = phonenumberutil.parse("+14156667777", None) expected = [number] * 10 matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10) actual = [x.number for x in matcher] self.assertEqual(expected, actual)
def testMaxMatches(self): # Set up text with 100 valid phone numbers. numbers = "My info: 415-666-7777," * 100 # Matches all 100. Max only applies to failed cases. number = phonenumberutil.parse("+14156667777", None) expected = [number] * 100 matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10) actual = [x.number for x in matcher] self.assertEqual(expected, actual)
def testMatchWithSurroundingZipcodes(self): number = "415-666-7777" zipPreceding = "My address is CA 34215. " + number + " is my number." expectedResult = phonenumberutil.parse(number, "US") matcher = PhoneNumberMatcher(zipPreceding, "US") if matcher.has_next(): match = matcher.next() else: match = None self.assertTrue(match is not None, msg="Did not find a number in '" + zipPreceding + "'; expected " + number) self.assertEquals(expectedResult, match.number) self.assertEquals(number, match.raw_string) # Now repeat, but this time the phone number has spaces in it. It should still be found. number = "(415) 666 7777" zipFollowing = "My number is " + number + ". 34215 is my zip-code." matcher = PhoneNumberMatcher(zipFollowing, "US") if matcher.has_next(): matchWithSpaces = matcher.next() else: matchWithSpaces = None self.assertTrue(matchWithSpaces is not None, msg="Did not find a number in '" + zipFollowing + "'; expected " + number) self.assertEquals(expectedResult, matchWithSpaces.number) self.assertEquals(number, matchWithSpaces.raw_string)
def _phone_numbers(query, region=None): if not isinstance(query, basestring): raise StopIteration found = False for guess in (region, "US", "CN", "RU"): try: for match in PhoneNumberMatcher(query, region=guess): yield format_number(match.number, 1) found = True except: pass if found: break
def testMatchWithSurroundingZipcodes(self): number = "415-666-7777" zipPreceding = "My address is CA 34215 - " + number + " is my number." matcher = PhoneNumberMatcher(zipPreceding, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, zipPreceding, number, "US") # Now repeat, but this time the phone number has spaces in it. It should still be found. number = "(415) 666 7777" zipFollowing = "My number is " + number + ". 34215 is my zip-code." matcher = PhoneNumberMatcher(zipFollowing, "US") match = matcher.next() if matcher.has_next() else None self.assertMatchProperties(match, zipFollowing, number, "US")
def testDoubleIteration(self): matcher = PhoneNumberMatcher("+14156667777 foobar +14156667777 ", "ZZ") # With hasNext() -> next(). # Double hasNext() to ensure it does not advance. self.assertTrue(matcher.has_next()) self.assertTrue(matcher.has_next()) self.assertTrue(matcher.next() is not None) self.assertTrue(matcher.has_next()) self.assertTrue(matcher.has_next()) self.assertTrue(matcher.next() is not None) self.assertFalse(matcher.has_next()) try: matcher.next() self.fail("Violation of the Matcher contract.") except Exception: # Success pass self.assertFalse(matcher.has_next()) # With next() only. matcher = PhoneNumberMatcher("+14156667777 foobar +14156667777 ", "ZZ") self.assertTrue(matcher.next() is not None) self.assertTrue(matcher.next() is not None) try: matcher.next() self.fail("Violation of the Matcher contract.") except Exception: # Success pass
def testDoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace(self): # No white-space found between numbers - neither is found. text = "Call 650-253-4561--455-234-3451" region = "US" self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region)))
def testMaxMatchesInvalid(self): # Set up text with 10 invalid phone numbers followed by 100 valid. numbers = (("My address 949-8945-0" * 10) + ("My info: 415-666-7777," * 100)) matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10) self.assertFalse(matcher.has_next())
def testNoMatchInEmptyString(self): self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("", "US"))) self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(" ", "US")))
def testNoMatchIfNoNumber(self): self.assertTrue( self.hasNoMatches( PhoneNumberMatcher( "Random text body - number is foobar, see you there", "US")))
def testNoneInput(self): self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(None, "US"))) self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(None, None)))
def findNumbersForLeniency(self, text, defaultCountry, leniency): return PhoneNumberMatcher(text, defaultCountry, leniency, sys.maxint)
import phonenumbers from phonenumbers import PhoneNumberMatcher, geocoder x = phonenumbers.parse("Hello my number is +65-9121 8077", None) print(x) y = phonenumbers.parse("020 8366 1177", "GB") print(y) text = "Call me at 510-748-8230 if it's before 9:30, or on 703-4800500 after 10am." numbers = [] for match in PhoneNumberMatcher(text, "US"): print(match.number) numbers.append("+1" + str(match.number.national_number)) print(numbers) geolocation = geocoder.description_for_number(x, "en") print(geolocation)