def prepare_landmark_datastring(self, data): """ Cleans up and validates the datastring :param data: data we want to check for being a location :type data: str :return: the cleaned up datastring :rtype: str """ data = registry.get('LP_the_regex').sub('', data).strip() if len(data) > 75: return name_parser = NameParser(self.config) if not name_parser.basic_validation(data.split()): return allowed_chars = \ string.whitespace + string.ascii_letters + string.digits allowed_chars += '.,-:' if [x for x in data if x not in allowed_chars]: return return data
def is_address_name(self, suspected_name): """ Checking if the start of an address bears a name :param suspected_name: the text we think is a name :type suspected_name: str :return: if this is an address :rtype: str """ name_parser = NameParser(self.config) for _ in name_parser.parse(suspected_name): return True return False
def prepare_landmark_datastring(self, data): """Cleans up and validates the datastring""" data = registry.get('LP_the_regex').sub('', data).strip() if len(data) > 75: return name_parser = NameParser(self.config) if not name_parser.basic_validation(data.split()): return allowed_chars = \ string.whitespace + string.ascii_letters + string.digits allowed_chars += '.,-:' if [x for x in data if x not in allowed_chars]: return return data
def prepare_landmark_datastring(cls, data): """Cleans up and validates the datastring""" data = registry.get('LP_the_regex').sub('', data).strip() if len(data) > 75: return if not NameParser.basic_validation(data.split()): return allowed_chars = \ string.whitespace + string.ascii_letters + string.digits allowed_chars += '.,-:' if [x for x in data if x not in allowed_chars]: return return data
def setUp(self): NameParser.bootstrap(TestConfig()) AddressParser.bootstrap(TestConfig()) self.ap = AddressParser(TestConfig())
def setUp(self): NameParser.bootstrap(TestConfig()) self.np = NameParser(TestConfig())
class NameParserTests(unittest.TestCase): """Unit Testing of the NameParser""" np = None def setUp(self): NameParser.bootstrap(TestConfig()) self.np = NameParser(TestConfig()) def tearDown(self): registry.flush() self.np = None def test_basic_validation(self): self.assertFalse(self.np.basic_validation(['foo', 'Bar', '2nd'])) self.assertFalse(self.np.basic_validation(['Foo', 'Bar', 'a123'])) self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '$123'])) self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '123'])) self.assertTrue(self.np.basic_validation(['Foo', 'Bar', '2nd'])) def test_is_prefix(self): self.assertFalse(self.np.is_prefix('foo')) self.assertTrue(self.np.is_prefix('Mr')) def test_is_suffix(self): self.assertFalse(self.np.is_suffix('foo')) self.assertTrue(self.np.is_suffix('Sr')) self.assertTrue(self.np.is_suffix('IV')) def test_is_initial(self): self.assertFalse(self.np.is_initial('Hello')) self.assertFalse(self.np.is_initial('1')) self.assertFalse(self.np.is_initial('1.')) self.assertFalse(self.np.is_initial('1,')) self.assertFalse(self.np.is_initial('A,')) self.assertTrue(self.np.is_initial('Q')) self.assertTrue(self.np.is_initial('Q.')) def test_parseWithNoUpperCaseLettersYieldsNothing(self): count = 0 for _ in self.np.parse('foo'): count += 1 self.assertEqual(count, 0) def test_parseWithGreaterThanTenWordsYieldsNothing(self): count = 0 for _ in self.np.parse( 'Foo bar baz buns barf blarg bleh bler blue sner sneh snaf.' ): count += 1 self.assertEqual(count, 0) def test_parseWithNonBasicValidatedAttributesYieldsNothing(self): count = 0 for _ in self.np.parse('Foo bar The Third'): count += 1 self.assertEqual(count, 0) def test_parseYieldsExpectedConfidenceWithFiveWordName(self): count = 0 for result in self.np.parse('Dr. Foo Bar Bleh Bar Sr.'): self.assertEqual(result.confidence, 52) self.assertEqual(result.subtype, 'Name') count += 1 self.assertEqual(count, 1) def test_parseYieldsExpectedConfidenceWithThreeWordName(self): count = 0 for result in self.np.parse('Dr. Foo Q. Ben Sr.'): self.assertEqual(result.confidence, 95) self.assertEqual(result.subtype, 'Name') count += 1 self.assertEqual(count, 1) def test_parseYieldsNothingWithOneWordName(self): count = 0 for _ in self.np.parse('Foo'): count += 1 self.assertEqual(count, 0) def test_parseYieldsNothingWithNonPrintableCharacters(self): count = 0 for _ in self.np.parse(u('40.244° N 79.123° W')): count += 1 self.assertEqual(count, 0)
class NameParserTests(unittest.TestCase): """Unit Testing of the NameParser""" np = None def setUp(self): NameParser.bootstrap(TestConfig()) self.np = NameParser(TestConfig()) def tearDown(self): registry.flush() self.np = None def test_basic_validation(self): self.assertFalse(self.np.basic_validation(['foo', 'Bar', '2nd'])) self.assertFalse(self.np.basic_validation(['Foo', 'Bar', 'a123'])) self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '$123'])) self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '123'])) self.assertTrue(self.np.basic_validation(['Foo', 'Bar', '2nd'])) def test_is_prefix(self): self.assertFalse(self.np.is_prefix('foo')) self.assertTrue(self.np.is_prefix('Mr')) def test_is_suffix(self): self.assertFalse(self.np.is_suffix('foo')) self.assertTrue(self.np.is_suffix('Sr')) self.assertTrue(self.np.is_suffix('IV')) def test_is_initial(self): self.assertFalse(self.np.is_initial('Hello')) self.assertFalse(self.np.is_initial('1')) self.assertFalse(self.np.is_initial('1.')) self.assertFalse(self.np.is_initial('1,')) self.assertFalse(self.np.is_initial('A,')) self.assertTrue(self.np.is_initial('Q')) self.assertTrue(self.np.is_initial('Q.')) def test_parseWithNoUpperCaseLettersYieldsNothing(self): count = 0 for _ in self.np.parse('foo'): count += 1 self.assertEqual(count, 0) def test_parseWithGreaterThanTenWordsYieldsNothing(self): count = 0 for _ in self.np.parse( 'Foo bar baz buns barf blarg bleh bler blue sner sneh snaf.'): count += 1 self.assertEqual(count, 0) def test_parseWithNonBasicValidatedAttributesYieldsNothing(self): count = 0 for _ in self.np.parse('Foo bar The Third'): count += 1 self.assertEqual(count, 0) def test_parseYieldsExpectedConfidenceWithFiveWordName(self): count = 0 for result in self.np.parse('Dr. Foo Bar Bleh Bar Sr.'): self.assertEqual(result.confidence, 52) self.assertEqual(result.subtype, 'Name') count += 1 self.assertEqual(count, 1) def test_parseYieldsExpectedConfidenceWithThreeWordName(self): count = 0 for result in self.np.parse('Dr. Foo Q. Ben Sr.'): self.assertEqual(result.confidence, 95) self.assertEqual(result.subtype, 'Name') count += 1 self.assertEqual(count, 1) def test_parseYieldsNothingWithOneWordName(self): count = 0 for _ in self.np.parse('Foo'): count += 1 self.assertEqual(count, 0) def test_parseYieldsNothingWithNonPrintableCharacters(self): count = 0 for _ in self.np.parse(u('40.244° N 79.123° W')): count += 1 self.assertEqual(count, 0)