Beispiel #1
0
    def test_custom_regex(self):
        # Defining custom sub-regex that do not make a lot of sense IRL
        re_color = "(?P<color>red?)"
        re_first_name = "(?P<firstname>[a-z]+)"
        re_last_name = "(?P<lastname>[a-z]+)"
        re_phone = "(?P<phonenumber>[()\-\d]{6})"
        re_zipcode = "(?P<zipcode>\d{3})"

        # Define a new format using custom regex
        custom_format = [
            ContactFormat(
                [re_color, re_first_name, re_last_name, re_phone, re_zipcode],
                separator="#")
        ]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse,
                          "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "red#Francis#Rougemont#123456#424"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)
Beispiel #2
0
def rolodex(input_file_path, output_file_path):
    """
    Rolodex takes entries of personal information in multiple formats from the given input file and
    normalizes each entry into a standard JSON format
    :param input_file_path: input file to parse
    :param output_file_path: optional: save the results to the output file
    """
    parser = ContactParser()
    extracted_contacts = []
    parse_errors_indexes = []

    try:
        with open(input_file_path) as input_file:
            for index, line in enumerate(input_file):
                try:
                    # Remove line breaks at the end of the entry
                    contact = parser.parse(line.rstrip())
                    # The parser found a valid contact, we add it to the final list
                    extracted_contacts.append(contact)
                except ContactParseError, e:
                    logging.warn(e)
                    # We keep track of the line index in case of an error
                    parse_errors_indexes.append(index)
                except Exception, e:
                    logging.error(e)
Beispiel #3
0
    def test_custom_format(self):
        # Define a new format using default regex
        custom_format = [ContactFormat([contact_format.RE_PHONE,
                                        contact_format.RE_COLOR,
                                        contact_format.RE_ZIPCODE,
                                        contact_format.RE_LAST_NAME,
                                        contact_format.RE_FIRST_NAME])]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse, "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "191 933 8599, pink, 10036, Maurita, Awong"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)
Beispiel #4
0
    def test_custom_format(self):
        # Define a new format using default regex
        custom_format = [
            ContactFormat([
                contact_format.RE_PHONE, contact_format.RE_COLOR,
                contact_format.RE_ZIPCODE, contact_format.RE_LAST_NAME,
                contact_format.RE_FIRST_NAME
            ])
        ]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse,
                          "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "191 933 8599, pink, 10036, Maurita, Awong"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)
Beispiel #5
0
    def test_custom_regex(self):
        # Defining custom sub-regex that do not make a lot of sense IRL
        re_color = "(?P<color>red?)"
        re_first_name = "(?P<firstname>[a-z]+)"
        re_last_name = "(?P<lastname>[a-z]+)"
        re_phone = "(?P<phonenumber>[()\-\d]{6})"
        re_zipcode = "(?P<zipcode>\d{3})"

        # Define a new format using custom regex
        custom_format = [ContactFormat([re_color, re_first_name, re_last_name, re_phone, re_zipcode], separator="#")]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse, "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "red#Francis#Rougemont#123456#424"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)
Beispiel #6
0
class ParserTest(unittest.TestCase):
    """
    Testing the Parser functionalities and behavior.
    The default Parser must respect the 3 allowed format and reject entries that don't match the pattern.
    The Parser also needs to be flexible in order to add/implement custom new formats.
    """
    def setUp(self):
        self.parser = ContactParser()

    def test_invalid_formats(self):
        self.assertRaises(ContactParseError, self.parser.parse,
                          "0.358358554738")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "13121, Gustavson, Natashia, (491)-571-5970, blue")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "(555)-111-1111, Won, George, aqua marine, 77594")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "red, Shanika, Rodh, (709)-353-2921, 60864")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "Rachele Maze, 64938, 607 089 6760")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "Rachele Maze, 607 089 6760")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "Magaly, , 64568, 289 471 3436, blue")
        self.assertRaises(ContactParseError, self.parser.parse,
                          "Maurita, Awong, 16296, 191 933 8599, ")

    def test_valid_formats(self):
        self.assertIsInstance(
            self.parser.parse(
                "Lastname, Firstname, (703)-742-0996, Blue, 10013"), Contact)
        self.assertIsInstance(
            self.parser.parse("Firstname Lastname, Red, 11237, 703 955 0373"),
            Contact)
        self.assertIsInstance(
            self.parser.parse(
                "Firstname, Lastname, 10013, 646 111 0101, Green"), Contact)
        self.assertIsInstance(
            self.parser.parse("Rachele Maze, pink, 64938, 607 089 6760"),
            Contact)
        self.assertIsInstance(
            self.parser.parse(
                "Booker T., Washington, 87360, 373 781 7380, yellow"), Contact)
        self.assertIsInstance(
            self.parser.parse("James Murphy, yellow, 83880, 018 154 6474"),
            Contact)
        self.assertIsInstance(
            self.parser.parse("Maurita, Awong, 16296, 191 933 8599, blue"),
            Contact)

    def test_custom_format(self):
        # Define a new format using default regex
        custom_format = [
            ContactFormat([
                contact_format.RE_PHONE, contact_format.RE_COLOR,
                contact_format.RE_ZIPCODE, contact_format.RE_LAST_NAME,
                contact_format.RE_FIRST_NAME
            ])
        ]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse,
                          "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "191 933 8599, pink, 10036, Maurita, Awong"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)

    def test_custom_regex(self):
        # Defining custom sub-regex that do not make a lot of sense IRL
        re_color = "(?P<color>red?)"
        re_first_name = "(?P<firstname>[a-z]+)"
        re_last_name = "(?P<lastname>[a-z]+)"
        re_phone = "(?P<phonenumber>[()\-\d]{6})"
        re_zipcode = "(?P<zipcode>\d{3})"

        # Define a new format using custom regex
        custom_format = [
            ContactFormat(
                [re_color, re_first_name, re_last_name, re_phone, re_zipcode],
                separator="#")
        ]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse,
                          "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "red#Francis#Rougemont#123456#424"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)
Beispiel #7
0
 def setUp(self):
     self.parser = ContactParser()
Beispiel #8
0
class ParserTest(unittest.TestCase):
    """
    Testing the Parser functionalities and behavior.
    The default Parser must respect the 3 allowed format and reject entries that don't match the pattern.
    The Parser also needs to be flexible in order to add/implement custom new formats.
    """
    def setUp(self):
        self.parser = ContactParser()

    def test_invalid_formats(self):
        self.assertRaises(ContactParseError, self.parser.parse, "0.358358554738")
        self.assertRaises(ContactParseError, self.parser.parse, "13121, Gustavson, Natashia, (491)-571-5970, blue")
        self.assertRaises(ContactParseError, self.parser.parse, "(555)-111-1111, Won, George, aqua marine, 77594")
        self.assertRaises(ContactParseError, self.parser.parse, "red, Shanika, Rodh, (709)-353-2921, 60864")
        self.assertRaises(ContactParseError, self.parser.parse, "Rachele Maze, 64938, 607 089 6760")
        self.assertRaises(ContactParseError, self.parser.parse, "Rachele Maze, 607 089 6760")
        self.assertRaises(ContactParseError, self.parser.parse, "Magaly, , 64568, 289 471 3436, blue")
        self.assertRaises(ContactParseError, self.parser.parse, "Maurita, Awong, 16296, 191 933 8599, ")

    def test_valid_formats(self):
        self.assertIsInstance(self.parser.parse("Lastname, Firstname, (703)-742-0996, Blue, 10013"), Contact)
        self.assertIsInstance(self.parser.parse("Firstname Lastname, Red, 11237, 703 955 0373"), Contact)
        self.assertIsInstance(self.parser.parse("Firstname, Lastname, 10013, 646 111 0101, Green"), Contact)
        self.assertIsInstance(self.parser.parse("Rachele Maze, pink, 64938, 607 089 6760"), Contact)
        self.assertIsInstance(self.parser.parse("Booker T., Washington, 87360, 373 781 7380, yellow"), Contact)
        self.assertIsInstance(self.parser.parse("James Murphy, yellow, 83880, 018 154 6474"), Contact)
        self.assertIsInstance(self.parser.parse("Maurita, Awong, 16296, 191 933 8599, blue"), Contact)

    def test_custom_format(self):
        # Define a new format using default regex
        custom_format = [ContactFormat([contact_format.RE_PHONE,
                                        contact_format.RE_COLOR,
                                        contact_format.RE_ZIPCODE,
                                        contact_format.RE_LAST_NAME,
                                        contact_format.RE_FIRST_NAME])]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse, "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "191 933 8599, pink, 10036, Maurita, Awong"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)

    def test_custom_regex(self):
        # Defining custom sub-regex that do not make a lot of sense IRL
        re_color = "(?P<color>red?)"
        re_first_name = "(?P<firstname>[a-z]+)"
        re_last_name = "(?P<lastname>[a-z]+)"
        re_phone = "(?P<phonenumber>[()\-\d]{6})"
        re_zipcode = "(?P<zipcode>\d{3})"

        # Define a new format using custom regex
        custom_format = [ContactFormat([re_color, re_first_name, re_last_name, re_phone, re_zipcode], separator="#")]

        # Create the custom parser
        custom_parser = ContactParser(formats=custom_format)

        self.assertRaises(ContactParseError, custom_parser.parse, "(555)-111-1111, Won, George, aqua marine, 77594")

        entry = "red#Francis#Rougemont#123456#424"

        # Valid entry defined in the new parser rules
        self.assertIsInstance(custom_parser.parse(entry), Contact)

        # Invalid entry using the regular default parser rules
        self.assertRaises(ContactParseError, self.parser.parse, entry)
Beispiel #9
0
 def setUp(self):
     self.parser = ContactParser()