Ejemplo n.º 1
0
    def prepare_landmark_datastring(self, data):
        """
        Cleans up and validates the datastring

        :param data: data we want to check for being a location
        :type data: str
        :return: the cleaned up datastring
        :rtype: str
        """
        data = registry.get('LP_the_regex').sub('', data).strip()

        if len(data) > 75:
            return

        name_parser = NameParser(self.config)
        if not name_parser.basic_validation(data.split()):
            return

        allowed_chars = \
            string.whitespace + string.ascii_letters + string.digits
        allowed_chars += '.,-:'

        if [x for x in data if x not in allowed_chars]:
            return

        return data
Ejemplo n.º 2
0
    def is_address_name(self, suspected_name):
        """
        Checking if the start of an address bears a name

        :param suspected_name: the text we think is a name
        :type suspected_name: str
        :return: if this is an address
        :rtype: str
        """
        name_parser = NameParser(self.config)
        for _ in name_parser.parse(suspected_name):
            return True
        return False
Ejemplo n.º 3
0
    def is_address_name(self, suspected_name):
        """
        Checking if the start of an address bears a name

        :param suspected_name: the text we think is a name
        :type suspected_name: str
        :return: if this is an address
        :rtype: str
        """
        name_parser = NameParser(self.config)
        for _ in name_parser.parse(suspected_name):
            return True
        return False
Ejemplo n.º 4
0
    def prepare_landmark_datastring(self, data):
        """Cleans up and validates the datastring"""
        data = registry.get('LP_the_regex').sub('', data).strip()

        if len(data) > 75:
            return

        name_parser = NameParser(self.config)
        if not name_parser.basic_validation(data.split()):
            return

        allowed_chars = \
            string.whitespace + string.ascii_letters + string.digits
        allowed_chars += '.,-:'

        if [x for x in data if x not in allowed_chars]:
            return

        return data
Ejemplo n.º 5
0
    def prepare_landmark_datastring(cls, data):
        """Cleans up and validates the datastring"""
        data = registry.get('LP_the_regex').sub('', data).strip()

        if len(data) > 75:
            return

        if not NameParser.basic_validation(data.split()):
            return

        allowed_chars = \
            string.whitespace + string.ascii_letters + string.digits
        allowed_chars += '.,-:'

        if [x for x in data if x not in allowed_chars]:
            return

        return data
Ejemplo n.º 6
0
 def setUp(self):
     NameParser.bootstrap(TestConfig())
     AddressParser.bootstrap(TestConfig())
     self.ap = AddressParser(TestConfig())
Ejemplo n.º 7
0
 def setUp(self):
     NameParser.bootstrap(TestConfig())
     self.np = NameParser(TestConfig())
Ejemplo n.º 8
0
class NameParserTests(unittest.TestCase):
    """Unit Testing of the NameParser"""

    np = None

    def setUp(self):
        NameParser.bootstrap(TestConfig())
        self.np = NameParser(TestConfig())

    def tearDown(self):
        registry.flush()
        self.np = None

    def test_basic_validation(self):

        self.assertFalse(self.np.basic_validation(['foo', 'Bar', '2nd']))
        self.assertFalse(self.np.basic_validation(['Foo', 'Bar', 'a123']))
        self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '$123']))
        self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '123']))

        self.assertTrue(self.np.basic_validation(['Foo', 'Bar', '2nd']))

    def test_is_prefix(self):

        self.assertFalse(self.np.is_prefix('foo'))

        self.assertTrue(self.np.is_prefix('Mr'))

    def test_is_suffix(self):

        self.assertFalse(self.np.is_suffix('foo'))

        self.assertTrue(self.np.is_suffix('Sr'))
        self.assertTrue(self.np.is_suffix('IV'))

    def test_is_initial(self):

        self.assertFalse(self.np.is_initial('Hello'))
        self.assertFalse(self.np.is_initial('1'))
        self.assertFalse(self.np.is_initial('1.'))
        self.assertFalse(self.np.is_initial('1,'))
        self.assertFalse(self.np.is_initial('A,'))

        self.assertTrue(self.np.is_initial('Q'))
        self.assertTrue(self.np.is_initial('Q.'))

    def test_parseWithNoUpperCaseLettersYieldsNothing(self):
        count = 0
        for _ in self.np.parse('foo'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseWithGreaterThanTenWordsYieldsNothing(self):
        count = 0
        for _ in self.np.parse(
                'Foo bar baz buns barf blarg bleh bler blue sner sneh snaf.'
        ):
            count += 1
        self.assertEqual(count, 0)

    def test_parseWithNonBasicValidatedAttributesYieldsNothing(self):
        count = 0
        for _ in self.np.parse('Foo bar The Third'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseYieldsExpectedConfidenceWithFiveWordName(self):
        count = 0
        for result in self.np.parse('Dr. Foo Bar Bleh Bar Sr.'):
            self.assertEqual(result.confidence, 52)
            self.assertEqual(result.subtype, 'Name')
            count += 1
        self.assertEqual(count, 1)

    def test_parseYieldsExpectedConfidenceWithThreeWordName(self):
        count = 0
        for result in self.np.parse('Dr. Foo Q. Ben Sr.'):
            self.assertEqual(result.confidence, 95)
            self.assertEqual(result.subtype, 'Name')
            count += 1
        self.assertEqual(count, 1)

    def test_parseYieldsNothingWithOneWordName(self):
        count = 0
        for _ in self.np.parse('Foo'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseYieldsNothingWithNonPrintableCharacters(self):
        count = 0
        for _ in self.np.parse(u('40.244° N 79.123° W')):
            count += 1
        self.assertEqual(count, 0)
Ejemplo n.º 9
0
 def setUp(self):
     NameParser.bootstrap(TestConfig())
     self.np = NameParser(TestConfig())
Ejemplo n.º 10
0
class NameParserTests(unittest.TestCase):
    """Unit Testing of the NameParser"""

    np = None

    def setUp(self):
        NameParser.bootstrap(TestConfig())
        self.np = NameParser(TestConfig())

    def tearDown(self):
        registry.flush()
        self.np = None

    def test_basic_validation(self):

        self.assertFalse(self.np.basic_validation(['foo', 'Bar', '2nd']))
        self.assertFalse(self.np.basic_validation(['Foo', 'Bar', 'a123']))
        self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '$123']))
        self.assertFalse(self.np.basic_validation(['Foo', 'Bar', '123']))

        self.assertTrue(self.np.basic_validation(['Foo', 'Bar', '2nd']))

    def test_is_prefix(self):

        self.assertFalse(self.np.is_prefix('foo'))

        self.assertTrue(self.np.is_prefix('Mr'))

    def test_is_suffix(self):

        self.assertFalse(self.np.is_suffix('foo'))

        self.assertTrue(self.np.is_suffix('Sr'))
        self.assertTrue(self.np.is_suffix('IV'))

    def test_is_initial(self):

        self.assertFalse(self.np.is_initial('Hello'))
        self.assertFalse(self.np.is_initial('1'))
        self.assertFalse(self.np.is_initial('1.'))
        self.assertFalse(self.np.is_initial('1,'))
        self.assertFalse(self.np.is_initial('A,'))

        self.assertTrue(self.np.is_initial('Q'))
        self.assertTrue(self.np.is_initial('Q.'))

    def test_parseWithNoUpperCaseLettersYieldsNothing(self):
        count = 0
        for _ in self.np.parse('foo'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseWithGreaterThanTenWordsYieldsNothing(self):
        count = 0
        for _ in self.np.parse(
                'Foo bar baz buns barf blarg bleh bler blue sner sneh snaf.'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseWithNonBasicValidatedAttributesYieldsNothing(self):
        count = 0
        for _ in self.np.parse('Foo bar The Third'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseYieldsExpectedConfidenceWithFiveWordName(self):
        count = 0
        for result in self.np.parse('Dr. Foo Bar Bleh Bar Sr.'):
            self.assertEqual(result.confidence, 52)
            self.assertEqual(result.subtype, 'Name')
            count += 1
        self.assertEqual(count, 1)

    def test_parseYieldsExpectedConfidenceWithThreeWordName(self):
        count = 0
        for result in self.np.parse('Dr. Foo Q. Ben Sr.'):
            self.assertEqual(result.confidence, 95)
            self.assertEqual(result.subtype, 'Name')
            count += 1
        self.assertEqual(count, 1)

    def test_parseYieldsNothingWithOneWordName(self):
        count = 0
        for _ in self.np.parse('Foo'):
            count += 1
        self.assertEqual(count, 0)

    def test_parseYieldsNothingWithNonPrintableCharacters(self):
        count = 0
        for _ in self.np.parse(u('40.244° N 79.123° W')):
            count += 1
        self.assertEqual(count, 0)