예제 #1
0
 def _test_fails_on_broken_cid_from_text(self, cid_text, anticipated_error_message_pattern=None):
     assert cid_text is not None
     try:
         interface.create_cid_from_string(cid_text)
         self.fail('InterfaceError must be raised')
     except errors.InterfaceError as anticipated_error:
         if anticipated_error_message_pattern is not None:
             anticipated_error_message = six.text_type(anticipated_error)
             if not fnmatch.fnmatch(anticipated_error_message, anticipated_error_message_pattern):
                 self.fail(
                     'anticipated error message must match %r but is %r'
                     % (anticipated_error_message_pattern, anticipated_error_message)
                 )
예제 #2
0
 def _test_fails_on_broken_cid_from_text(
         self, cid_text, anticipated_error_message_pattern=None):
     assert cid_text is not None
     try:
         interface.create_cid_from_string(cid_text)
         self.fail('InterfaceError must be raised')
     except errors.InterfaceError as anticipated_error:
         if anticipated_error_message_pattern is not None:
             anticipated_error_message = six.text_type(anticipated_error)
             if not fnmatch.fnmatch(anticipated_error_message,
                                    anticipated_error_message_pattern):
                 self.fail(
                     'anticipated error message must match %r but is %r' %
                     (anticipated_error_message_pattern,
                      anticipated_error_message))
예제 #3
0
    def setUp(self):
        standard_delimited_cid_text = '\n'.join([
            'd,format,delimited', ' ,name   ,,empty,length,type,rule',
            'f,surname', 'f,height ,,     ,      ,Integer',
            'f,born_on,,     ,      ,DateTime,YYYY-MM-DD'
        ])
        self._standard_delimited_cid = interface.create_cid_from_string(
            standard_delimited_cid_text)

        fixed_cid_text = '\n'.join([
            'd,format,fixed', ' ,name   ,,empty,length,type,rule',
            'f,surname,,     ,10', 'f,height ,,     , 3    ,Integer',
            'f,born_on,,     ,10    ,DateTime,YYYY-MM-DD'
        ])
        # FIXME: Properly skip blanks when parsing "length" in CID.
        fixed_cid_text = fixed_cid_text.replace(' ', '')
        self._standard_fixed_cid = interface.create_cid_from_string(
            fixed_cid_text)
예제 #4
0
    def setUp(self):
        standard_delimited_cid_text = '\n'.join([
            'd,format,delimited',
            ' ,name   ,,empty,length,type,rule',
            'f,surname',
            'f,height ,,     ,      ,Integer',
            'f,born_on,,     ,      ,DateTime,YYYY-MM-DD'
        ])
        self._standard_delimited_cid = interface.create_cid_from_string(standard_delimited_cid_text)

        fixed_cid_text = '\n'.join([
            'd,format,fixed',
            ' ,name   ,,empty,length,type,rule',
            'f,surname,,     ,10',
            'f,height ,,     , 3    ,Integer',
            'f,born_on,,     ,10    ,DateTime,YYYY-MM-DD'
        ])
        # FIXME: Properly skip blanks when parsing "length" in CID.
        fixed_cid_text = fixed_cid_text.replace(' ', '')
        self._standard_fixed_cid = interface.create_cid_from_string(fixed_cid_text)
예제 #5
0
 def test_can_skip_header(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,header,1',
         'f,some_number,,,,Integer',
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('some_number\n1\n2\n3') as data:
         with validio.Reader(cid, data) as reader:
             rows = list(reader.rows())
     self.assertEqual([['some_number'], ['1'], ['2'], ['3']], rows)
예제 #6
0
 def test_can_skip_header(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,header,1',
         'f,some_number,,,,Integer',
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('some_number\n1\n2\n3') as data:
         with validio.Reader(cid, data) as reader:
             rows = list(reader.rows())
     self.assertEqual([['1'], ['2'], ['3']], rows)
예제 #7
0
 def test_can_create_cid_from_text(self):
     cid_text = '\n'.join([
         ',Example CID as CSV from a string',
         'D,Format,%s' % data.FORMAT_DELIMITED,
         ' ,Name         ,,,Length,Type    ,Rule',
         'F,name         ,,,...50',
         'F,height       ,,,      ,Decimal',
         'F,date_of_birth,,,      ,DateTime,YYYY-MM-DD',
     ])
     cid_from_text = interface.create_cid_from_string(cid_text)
     self.assertEqual(data.FORMAT_DELIMITED, cid_from_text.data_format.format)
예제 #8
0
 def test_can_create_cid_from_text(self):
     cid_text = '\n'.join([
         ',Example CID as CSV from a string',
         'D,Format,%s' % data.FORMAT_DELIMITED,
         ' ,Name         ,,,Length,Type    ,Rule',
         'F,name         ,,,...50',
         'F,height       ,,,      ,Decimal',
         'F,date_of_birth,,,      ,DateTime,YYYY-MM-DD',
     ])
     cid_from_text = interface.create_cid_from_string(cid_text)
     self.assertEqual(data.FORMAT_DELIMITED,
                      cid_from_text.data_format.format)
예제 #9
0
 def test_fails_on_error_after_header(self):
     cid_with_header_text = '\n'.join([
         'd,format,delimited',
         'd,header,2',
         ' ,name   ,,empty,length,type,rule',
         'f,height ,,     ,      ,Integer',
     ])
     cid_with_header = interface.create_cid_from_string(cid_with_header_text)
     with io.StringIO() as delimited_stream:
         with validio.Writer(cid_with_header, delimited_stream) as delimited_writer:
             delimited_writer.write_row(['some', 'header', 'columns'])
             delimited_writer.write_row(['height'])
             self.assertRaises(errors.FieldValueError, delimited_writer.write_row, ['abc'])
예제 #10
0
 def test_can_write_delimited_header(self):
     cid_with_header_text = '\n'.join([
         'd,format,delimited',
         'd,header,2',
         ' ,name   ,,empty,length,type,rule',
         'f,height ,,     ,      ,Integer',
     ])
     cid_with_header = interface.create_cid_from_string(cid_with_header_text)
     with io.StringIO() as delimited_stream:
         with validio.Writer(cid_with_header, delimited_stream) as delimited_writer:
             delimited_writer.write_row(['some', 'header', 'columns'])
             delimited_writer.write_row(['height'])
             delimited_writer.write_row(['173'])
예제 #11
0
 def test_can_continue_after_errors(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,encoding,ascii',
         'f,some_number,,,,Integer',
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('1\nabc\n3') as partially_broken_data:
         with validio.Reader(cid, partially_broken_data, 'continue') as reader:
             rows = list(reader.rows())
     expected_row_count = 2
     self.assertEqual(expected_row_count, len(rows), 'expected %d rows but got: %s' % (expected_row_count, rows))
     self.assertEqual([['1'], ['3']], rows)
예제 #12
0
 def test_fails_on_error_after_header(self):
     cid_with_header_text = '\n'.join([
         'd,format,delimited',
         'd,header,2',
         ' ,name   ,,empty,length,type,rule',
         'f,height ,,     ,      ,Integer',
     ])
     cid_with_header = interface.create_cid_from_string(cid_with_header_text)
     with io.StringIO() as delimited_stream:
         with validio.Writer(cid_with_header, delimited_stream) as delimited_writer:
             delimited_writer.write_row(['some', 'header', 'columns'])
             delimited_writer.write_row(['height'])
             self.assertRaises(errors.FieldValueError, delimited_writer.write_row, ['abc'])
예제 #13
0
 def test_can_continue_after_errors(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,encoding,ascii',
         'f,some_number,,,,Integer',
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('1\nabc\n3') as partially_broken_data:
         with validio.Reader(cid, partially_broken_data, 'continue') as reader:
             rows = list(reader.rows())
     expected_row_count = 2
     self.assertEqual(expected_row_count, len(rows), 'expected %d rows but got: %s' % (expected_row_count, rows))
     self.assertEqual([['1'], ['3']], rows)
예제 #14
0
 def test_can_write_delimited_header(self):
     cid_with_header_text = '\n'.join([
         'd,format,delimited',
         'd,header,2',
         ' ,name   ,,empty,length,type,rule',
         'f,height ,,     ,      ,Integer',
     ])
     cid_with_header = interface.create_cid_from_string(cid_with_header_text)
     with io.StringIO() as delimited_stream:
         with validio.Writer(cid_with_header, delimited_stream) as delimited_writer:
             delimited_writer.write_row(['some', 'header', 'columns'])
             delimited_writer.write_row(['height'])
             delimited_writer.write_row(['173'])
예제 #15
0
 def test_can_access_field_information(self):
     cid_text = '\n'.join([
         ',Example CID as CSV from a string',
         'D,Format,%s' % data.FORMAT_DELIMITED,
         ' ,Name         ,,,Length,Type    ,Rule',
         'F,name         ,,,...50',
         'F,height       ,,,      ,Decimal',
         'F,date_of_birth,,,      ,DateTime,YYYY-MM-DD',
     ])
     cid = interface.create_cid_from_string(cid_text)
     self.assertEqual(['name', 'height', 'date_of_birth'], cid.field_names)
     self.assertEqual(1, cid.field_index('height'))
     self.assertEqual('173', cid.field_value_for('height', ['hugo', '173', '1963-02-05']))
     self.assertEqual('height', cid.field_format_for('height').field_name)
예제 #16
0
 def test_can_yield_errors(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,encoding,ascii',
         'f,some_number,,,,Integer'
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('1\nabc\n3') as partially_broken_data:
         with validio.Reader(cid, partially_broken_data, 'yield') as reader:
             rows = list(reader.rows())
     self.assertEqual(3, len(rows), 'expected 3 rows but got: %s' % rows)
     self.assertEqual(['1'], rows[0])
     self.assertEqual(errors.FieldValueError, type(rows[1]), 'rows=%s' % rows)
     self.assertEqual(['3'], rows[2])
예제 #17
0
 def test_can_yield_errors(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,encoding,ascii',
         'f,some_number,,,,Integer'
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('1\nabc\n3') as partially_broken_data:
         with validio.Reader(cid, partially_broken_data, 'yield') as reader:
             rows = list(reader.rows())
     self.assertEqual(3, len(rows), 'expected 3 rows but got: %s' % rows)
     self.assertEqual(['1'], rows[0])
     self.assertEqual(errors.FieldValueError, type(rows[1]), 'rows=%s' % rows)
     self.assertEqual(['3'], rows[2])
예제 #18
0
 def test_fails_on_error_in_first_non_header_row(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,header,1',
         'f,some_number,,,,Integer',
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('some_number\nabc\n') as broken_data:
         with validio.Reader(cid, broken_data) as reader:
             try:
                 list(reader.rows())
                 self.fail()
             except errors.FieldValueError as anticipated_error:
                 dev_test.assert_fnmatches(
                     self, str(anticipated_error),
                     "* (R2C1): cannot accept field 'some_number': value must be an integer number: 'abc'")
예제 #19
0
 def test_fails_on_error_in_first_non_header_row(self):
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,header,1',
         'f,some_number,,,,Integer',
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('some_number\nabc\n') as broken_data:
         with validio.Reader(cid, broken_data) as reader:
             try:
                 list(reader.rows())
                 self.fail()
             except errors.FieldValueError as anticipated_error:
                 dev_test.assert_fnmatches(
                     self, str(anticipated_error),
                     "* (R2C1): cannot accept field 'some_number': value must be an integer number: 'abc'")
예제 #20
0
 def test_can_access_field_information(self):
     cid_text = '\n'.join([
         ',Example CID as CSV from a string',
         'D,Format,%s' % data.FORMAT_DELIMITED,
         ' ,Name         ,,,Length,Type    ,Rule',
         'F,name         ,,,...50',
         'F,height       ,,,      ,Decimal',
         'F,date_of_birth,,,      ,DateTime,YYYY-MM-DD',
     ])
     cid = interface.create_cid_from_string(cid_text)
     self.assertEqual(['name', 'height', 'date_of_birth'], cid.field_names)
     self.assertEqual(1, cid.field_index('height'))
     self.assertEqual(
         '173', cid.field_value_for('height',
                                    ['hugo', '173', '1963-02-05']))
     self.assertEqual('height', cid.field_format_for('height').field_name)
예제 #21
0
 def test_extra_columns_raise_error_when_validating_header(self):
     cid_text = '\n'.join([
         'd,format,csv',
         'd,header,1',
         'd,validate header row against field names,true',
         'f,age,',
     ])
     data_text = '\n'.join([
         'name,age',
         'me,52'
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO(data_text) as data:
         with validio.Reader(cid, data) as reader:
             with self.assertRaises(errors.DataError):
                 list(reader.rows())
예제 #22
0
    def test_quoting_is_enabled_by_default(self):
        cid_text = '\n'.join([
            'd,format,delimited',
            'd,header,1',
            'f,name',
        ])
        data_text = '\n'.join([
            'name',
            '"First Last"',
            'First d\'Last',
        ])

        cid = interface.create_cid_from_string(cid_text)
        with io.StringIO(data_text) as data:
            with validio.Reader(cid, data) as reader:
                rows = list(reader.rows())
        self.assertEqual([['First Last'], ['First d\'Last']], rows)
예제 #23
0
    def test_that_strict_field_names_can_be_disabled(self):
        cid_text = '\n'.join([
            'd,format,delimited',
            'd,header,1',
            'd,strict field names,false',
            'f, ~!@#$%^&*()_ Name'
        ])
        data_text = '\n'.join([
            'name',
            'First Last',
        ])

        cid = interface.create_cid_from_string(cid_text)
        with io.StringIO(data_text) as data:
            with validio.Reader(cid, data) as reader:
                rows = list(reader.rows())
        self.assertEqual([['First Last']], rows)
예제 #24
0
 def test_can_process_escape_character(self):
     """
     Regression test for #49: Fails when last char of field is escaped.
     """
     cid_text = '\n'.join([
         'd,format,delimited', 'd,line delimiter,lf', 'd,encoding,ascii',
         'd,quote character,""""', 'd,escape character,"\\"',
         'f,some_fields'
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('"\\"x"\n') as data_starting_with_escape_character:
         with validio.Reader(cid,
                             data_starting_with_escape_character) as reader:
             reader.validate_rows()
     with io.StringIO('"x\\""\n') as data_ending_with_escape_character:
         with validio.Reader(cid,
                             data_ending_with_escape_character) as reader:
             reader.validate_rows()
예제 #25
0
 def test_can_process_escape_character(self):
     """
     Regression test for #49: Fails when last char of field is escaped.
     """
     cid_text = '\n'.join([
         'd,format,delimited',
         'd,line delimiter,lf',
         'd,encoding,ascii',
         'd,quote character,""""',
         'd,escape character,"\\"',
         'f,some_fields'
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO('"\\"x"\n') as data_starting_with_escape_character:
         with validio.Reader(cid, data_starting_with_escape_character) as reader:
             reader.validate_rows()
     with io.StringIO('"x\\""\n') as data_ending_with_escape_character:
         with validio.Reader(cid, data_ending_with_escape_character) as reader:
             reader.validate_rows()
예제 #26
0
 def test_only_first_header_row_can_be_validated(self):
     cid_text = '\n'.join([
         'd,format,csv',
         'd,header,2',
         'd,validate header row against field names,true',
         'f,age,',
     ])
     data_text = '\n'.join([
         'age',
         '52'
     ])
     cid = interface.create_cid_from_string(cid_text)
     with io.StringIO(data_text) as data:
         with validio.Reader(cid, data) as reader:
             with self.assertRaises(errors.InterfaceError) as e:
                 list(reader.rows())
     self.assertEqual(
         "Cannot validate the header row, when 'Header' is set to '2'. "
         "Either set 'Header' to '1' or disable header validation with "
         "'Validate header row against field names' set to 'False'.",
         str(e.exception)
     )
예제 #27
0
from cutplace import interface
from cutplace import errors
from cutplace import validio
from tests import dev_test

_TEST_ENCODING = "cp1252"

_DIGIT_CID_TEXT = '\n'.join([
    'd,format,delimited',
    'd,encoding,ascii',
    'f,digit,,,1,Integer'
])
#: A CID for delimited data with 1 column per row that has to be a single
#  digit.
_DIGIT_CID = interface.create_cid_from_string(_DIGIT_CID_TEXT)


class ReaderTest(unittest.TestCase):
    """
    Tests for data formats.
    """
    def test_can_open_and_validate_csv(self):
        cid = interface.Cid(dev_test.CID_CUSTOMERS_ODS_PATH)
        with validio.Reader(cid, dev_test.CUSTOMERS_CSV_PATH) as reader:
            reader.validate_rows()

    def test_can_open_and_validate_excel(self):
        cid = interface.Cid(dev_test.path_to_test_cid("cid_customers_excel.xls"))
        with validio.Reader(cid, dev_test.path_to_test_data("valid_customers.xls")) as reader:
            reader.validate_rows()
예제 #28
0
from cutplace import interface
from cutplace import errors
from cutplace import validio
from tests import dev_test

_TEST_ENCODING = "cp1252"

_DIGIT_CID_TEXT = '\n'.join([
    'd,format,delimited',
    'd,encoding,ascii',
    'f,digit,,,1,Integer'
])
#: A CID for delimited data with 1 column per row that has to be a single
#  digit.
_DIGIT_CID = interface.create_cid_from_string(_DIGIT_CID_TEXT)


class ReaderTest(unittest.TestCase):
    """
    Tests for data formats.
    """
    def test_can_open_and_validate_csv_source_file(self):
        cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls"))
        with validio.Reader(cid, dev_test.path_to_test_data("valid_customers.csv")) as reader:
            reader.validate_rows()

    def test_can_open_and_validate_excel_source_file(self):
        cid = interface.Cid(dev_test.path_to_test_cid("icd_customers_excel.xls"))
        with validio.Reader(cid, dev_test.path_to_test_data("valid_customers.xls")) as reader:
            reader.validate_rows()