def _test_fails_on_broken_cid_from_text(self, cid_text, anticipated_error_message_pattern=None): assert cid_text is not None try: interface.create_cid_from_string(cid_text) self.fail('InterfaceError must be raised') except errors.InterfaceError as anticipated_error: if anticipated_error_message_pattern is not None: anticipated_error_message = six.text_type(anticipated_error) if not fnmatch.fnmatch(anticipated_error_message, anticipated_error_message_pattern): self.fail( 'anticipated error message must match %r but is %r' % (anticipated_error_message_pattern, anticipated_error_message) )
def _test_fails_on_broken_cid_from_text( self, cid_text, anticipated_error_message_pattern=None): assert cid_text is not None try: interface.create_cid_from_string(cid_text) self.fail('InterfaceError must be raised') except errors.InterfaceError as anticipated_error: if anticipated_error_message_pattern is not None: anticipated_error_message = six.text_type(anticipated_error) if not fnmatch.fnmatch(anticipated_error_message, anticipated_error_message_pattern): self.fail( 'anticipated error message must match %r but is %r' % (anticipated_error_message_pattern, anticipated_error_message))
def setUp(self): standard_delimited_cid_text = '\n'.join([ 'd,format,delimited', ' ,name ,,empty,length,type,rule', 'f,surname', 'f,height ,, , ,Integer', 'f,born_on,, , ,DateTime,YYYY-MM-DD' ]) self._standard_delimited_cid = interface.create_cid_from_string( standard_delimited_cid_text) fixed_cid_text = '\n'.join([ 'd,format,fixed', ' ,name ,,empty,length,type,rule', 'f,surname,, ,10', 'f,height ,, , 3 ,Integer', 'f,born_on,, ,10 ,DateTime,YYYY-MM-DD' ]) # FIXME: Properly skip blanks when parsing "length" in CID. fixed_cid_text = fixed_cid_text.replace(' ', '') self._standard_fixed_cid = interface.create_cid_from_string( fixed_cid_text)
def setUp(self): standard_delimited_cid_text = '\n'.join([ 'd,format,delimited', ' ,name ,,empty,length,type,rule', 'f,surname', 'f,height ,, , ,Integer', 'f,born_on,, , ,DateTime,YYYY-MM-DD' ]) self._standard_delimited_cid = interface.create_cid_from_string(standard_delimited_cid_text) fixed_cid_text = '\n'.join([ 'd,format,fixed', ' ,name ,,empty,length,type,rule', 'f,surname,, ,10', 'f,height ,, , 3 ,Integer', 'f,born_on,, ,10 ,DateTime,YYYY-MM-DD' ]) # FIXME: Properly skip blanks when parsing "length" in CID. fixed_cid_text = fixed_cid_text.replace(' ', '') self._standard_fixed_cid = interface.create_cid_from_string(fixed_cid_text)
def test_can_skip_header(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,header,1', 'f,some_number,,,,Integer', ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO('some_number\n1\n2\n3') as data: with validio.Reader(cid, data) as reader: rows = list(reader.rows()) self.assertEqual([['some_number'], ['1'], ['2'], ['3']], rows)
def test_can_skip_header(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,header,1', 'f,some_number,,,,Integer', ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO('some_number\n1\n2\n3') as data: with validio.Reader(cid, data) as reader: rows = list(reader.rows()) self.assertEqual([['1'], ['2'], ['3']], rows)
def test_can_create_cid_from_text(self): cid_text = '\n'.join([ ',Example CID as CSV from a string', 'D,Format,%s' % data.FORMAT_DELIMITED, ' ,Name ,,,Length,Type ,Rule', 'F,name ,,,...50', 'F,height ,,, ,Decimal', 'F,date_of_birth,,, ,DateTime,YYYY-MM-DD', ]) cid_from_text = interface.create_cid_from_string(cid_text) self.assertEqual(data.FORMAT_DELIMITED, cid_from_text.data_format.format)
def test_fails_on_error_after_header(self): cid_with_header_text = '\n'.join([ 'd,format,delimited', 'd,header,2', ' ,name ,,empty,length,type,rule', 'f,height ,, , ,Integer', ]) cid_with_header = interface.create_cid_from_string(cid_with_header_text) with io.StringIO() as delimited_stream: with validio.Writer(cid_with_header, delimited_stream) as delimited_writer: delimited_writer.write_row(['some', 'header', 'columns']) delimited_writer.write_row(['height']) self.assertRaises(errors.FieldValueError, delimited_writer.write_row, ['abc'])
def test_can_write_delimited_header(self): cid_with_header_text = '\n'.join([ 'd,format,delimited', 'd,header,2', ' ,name ,,empty,length,type,rule', 'f,height ,, , ,Integer', ]) cid_with_header = interface.create_cid_from_string(cid_with_header_text) with io.StringIO() as delimited_stream: with validio.Writer(cid_with_header, delimited_stream) as delimited_writer: delimited_writer.write_row(['some', 'header', 'columns']) delimited_writer.write_row(['height']) delimited_writer.write_row(['173'])
def test_can_continue_after_errors(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,encoding,ascii', 'f,some_number,,,,Integer', ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO('1\nabc\n3') as partially_broken_data: with validio.Reader(cid, partially_broken_data, 'continue') as reader: rows = list(reader.rows()) expected_row_count = 2 self.assertEqual(expected_row_count, len(rows), 'expected %d rows but got: %s' % (expected_row_count, rows)) self.assertEqual([['1'], ['3']], rows)
def test_can_access_field_information(self): cid_text = '\n'.join([ ',Example CID as CSV from a string', 'D,Format,%s' % data.FORMAT_DELIMITED, ' ,Name ,,,Length,Type ,Rule', 'F,name ,,,...50', 'F,height ,,, ,Decimal', 'F,date_of_birth,,, ,DateTime,YYYY-MM-DD', ]) cid = interface.create_cid_from_string(cid_text) self.assertEqual(['name', 'height', 'date_of_birth'], cid.field_names) self.assertEqual(1, cid.field_index('height')) self.assertEqual('173', cid.field_value_for('height', ['hugo', '173', '1963-02-05'])) self.assertEqual('height', cid.field_format_for('height').field_name)
def test_can_yield_errors(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,encoding,ascii', 'f,some_number,,,,Integer' ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO('1\nabc\n3') as partially_broken_data: with validio.Reader(cid, partially_broken_data, 'yield') as reader: rows = list(reader.rows()) self.assertEqual(3, len(rows), 'expected 3 rows but got: %s' % rows) self.assertEqual(['1'], rows[0]) self.assertEqual(errors.FieldValueError, type(rows[1]), 'rows=%s' % rows) self.assertEqual(['3'], rows[2])
def test_fails_on_error_in_first_non_header_row(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,header,1', 'f,some_number,,,,Integer', ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO('some_number\nabc\n') as broken_data: with validio.Reader(cid, broken_data) as reader: try: list(reader.rows()) self.fail() except errors.FieldValueError as anticipated_error: dev_test.assert_fnmatches( self, str(anticipated_error), "* (R2C1): cannot accept field 'some_number': value must be an integer number: 'abc'")
def test_can_access_field_information(self): cid_text = '\n'.join([ ',Example CID as CSV from a string', 'D,Format,%s' % data.FORMAT_DELIMITED, ' ,Name ,,,Length,Type ,Rule', 'F,name ,,,...50', 'F,height ,,, ,Decimal', 'F,date_of_birth,,, ,DateTime,YYYY-MM-DD', ]) cid = interface.create_cid_from_string(cid_text) self.assertEqual(['name', 'height', 'date_of_birth'], cid.field_names) self.assertEqual(1, cid.field_index('height')) self.assertEqual( '173', cid.field_value_for('height', ['hugo', '173', '1963-02-05'])) self.assertEqual('height', cid.field_format_for('height').field_name)
def test_extra_columns_raise_error_when_validating_header(self): cid_text = '\n'.join([ 'd,format,csv', 'd,header,1', 'd,validate header row against field names,true', 'f,age,', ]) data_text = '\n'.join([ 'name,age', 'me,52' ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO(data_text) as data: with validio.Reader(cid, data) as reader: with self.assertRaises(errors.DataError): list(reader.rows())
def test_quoting_is_enabled_by_default(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,header,1', 'f,name', ]) data_text = '\n'.join([ 'name', '"First Last"', 'First d\'Last', ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO(data_text) as data: with validio.Reader(cid, data) as reader: rows = list(reader.rows()) self.assertEqual([['First Last'], ['First d\'Last']], rows)
def test_that_strict_field_names_can_be_disabled(self): cid_text = '\n'.join([ 'd,format,delimited', 'd,header,1', 'd,strict field names,false', 'f, ~!@#$%^&*()_ Name' ]) data_text = '\n'.join([ 'name', 'First Last', ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO(data_text) as data: with validio.Reader(cid, data) as reader: rows = list(reader.rows()) self.assertEqual([['First Last']], rows)
def test_can_process_escape_character(self): """ Regression test for #49: Fails when last char of field is escaped. """ cid_text = '\n'.join([ 'd,format,delimited', 'd,line delimiter,lf', 'd,encoding,ascii', 'd,quote character,""""', 'd,escape character,"\\"', 'f,some_fields' ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO('"\\"x"\n') as data_starting_with_escape_character: with validio.Reader(cid, data_starting_with_escape_character) as reader: reader.validate_rows() with io.StringIO('"x\\""\n') as data_ending_with_escape_character: with validio.Reader(cid, data_ending_with_escape_character) as reader: reader.validate_rows()
def test_only_first_header_row_can_be_validated(self): cid_text = '\n'.join([ 'd,format,csv', 'd,header,2', 'd,validate header row against field names,true', 'f,age,', ]) data_text = '\n'.join([ 'age', '52' ]) cid = interface.create_cid_from_string(cid_text) with io.StringIO(data_text) as data: with validio.Reader(cid, data) as reader: with self.assertRaises(errors.InterfaceError) as e: list(reader.rows()) self.assertEqual( "Cannot validate the header row, when 'Header' is set to '2'. " "Either set 'Header' to '1' or disable header validation with " "'Validate header row against field names' set to 'False'.", str(e.exception) )
from cutplace import interface from cutplace import errors from cutplace import validio from tests import dev_test _TEST_ENCODING = "cp1252" _DIGIT_CID_TEXT = '\n'.join([ 'd,format,delimited', 'd,encoding,ascii', 'f,digit,,,1,Integer' ]) #: A CID for delimited data with 1 column per row that has to be a single # digit. _DIGIT_CID = interface.create_cid_from_string(_DIGIT_CID_TEXT) class ReaderTest(unittest.TestCase): """ Tests for data formats. """ def test_can_open_and_validate_csv(self): cid = interface.Cid(dev_test.CID_CUSTOMERS_ODS_PATH) with validio.Reader(cid, dev_test.CUSTOMERS_CSV_PATH) as reader: reader.validate_rows() def test_can_open_and_validate_excel(self): cid = interface.Cid(dev_test.path_to_test_cid("cid_customers_excel.xls")) with validio.Reader(cid, dev_test.path_to_test_data("valid_customers.xls")) as reader: reader.validate_rows()
from cutplace import interface from cutplace import errors from cutplace import validio from tests import dev_test _TEST_ENCODING = "cp1252" _DIGIT_CID_TEXT = '\n'.join([ 'd,format,delimited', 'd,encoding,ascii', 'f,digit,,,1,Integer' ]) #: A CID for delimited data with 1 column per row that has to be a single # digit. _DIGIT_CID = interface.create_cid_from_string(_DIGIT_CID_TEXT) class ReaderTest(unittest.TestCase): """ Tests for data formats. """ def test_can_open_and_validate_csv_source_file(self): cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls")) with validio.Reader(cid, dev_test.path_to_test_data("valid_customers.csv")) as reader: reader.validate_rows() def test_can_open_and_validate_excel_source_file(self): cid = interface.Cid(dev_test.path_to_test_cid("icd_customers_excel.xls")) with validio.Reader(cid, dev_test.path_to_test_data("valid_customers.xls")) as reader: reader.validate_rows()