def test_can_be_rendered_as_str(self): customers_cid_path = dev_test.path_to_test_cid("customers.xls") customers_cid = interface.Cid(customers_cid_path) cid_str = str(customers_cid) self.assertTrue('Cid' in cid_str) self.assertTrue(data.FORMAT_DELIMITED in cid_str) self.assertTrue(customers_cid.field_names[0] in cid_str)
def test_can_read_fields_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid('cid_customers.xls') cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader.field_names[0], 'customer_id') self.assertTrue( isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertEqual(cid_reader.field_names[1], 'surname') self.assertTrue( isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[1].length.items, ranges.Range('...60').items) self.assertEqual(cid_reader.field_names[2], 'first_name') self.assertTrue( isinstance(cid_reader.field_formats[2], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[2].length.items, ranges.Range('...60').items) self.assertTrue(cid_reader.field_formats[2].is_allowed_to_be_empty) self.assertEqual(cid_reader.field_names[3], 'date_of_birth') self.assertTrue( isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertEqual(cid_reader.field_names[4], 'gender') self.assertTrue( isinstance(cid_reader.field_formats[4], fields.ChoiceFieldFormat)) self.assertTrue(cid_reader.field_formats[4].is_allowed_to_be_empty)
def test_fails_on_non_existent_data(self): if six.PY2: expected_error_class = EnvironmentError else: expected_error_class = IOError cid_path = dev_test.path_to_test_cid('customers.xls') self.assertRaises(expected_error_class, applications.process, ['test_fails_on_non_existent_data', cid_path, 'no_such_data.csv'])
def test_fails_on_invalid_csv_source_file_with_duplicates(self): cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls")) with validio.Reader( cid, dev_test.path_to_test_data( "broken_customers_with_duplicates.csv")) as reader: self.assertRaises(errors.CheckError, reader.validate_rows)
def test_fails_on_csv_source_file_with_fewer_elements_than_expected(self): cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls")) with validio.Reader( cid, dev_test.path_to_test_data( "broken_customers_fewer_elements.csv")) as reader: self.assertRaises(errors.DataError, reader.validate_rows)
def test_can_open_and_validate_excel_source_file(self): cid = interface.Cid( dev_test.path_to_test_cid("icd_customers_excel.xls")) with validio.Reader( cid, dev_test.path_to_test_data("valid_customers.xls")) as reader: reader.validate_rows()
def test_can_read_cid_with_plugins(self): cid_path = dev_test.path_to_test_cid('customers_with_plugins.ods') exit_code = applications.process([ 'test_can_read_cid_with_plugins', '--plugins', dev_test.path_to_test_plugins(), cid_path ]) self.assertEqual(0, exit_code)
def test_can_read_excel_and_create_data_format_delimited(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("icd_customers.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader._data_format.format, "delimited") self.assertEqual(cid_reader._data_format.header, 1)
def setUp(self): customers_cid_path = dev_test.path_to_test_cid('customers.ods') self._cutplace_app = applications.CutplaceApp() self._cutplace_app.set_cid_from_path(customers_cid_path) self._valid_customers_csv_path = dev_test.path_to_test_data( 'valid_customers.csv') self._broken_customers_non_csv_path = dev_test.path_to_test_data( 'valid_customers.ods')
def test_can_validate_proper_csv_with_plugins(self): cid_path = dev_test.path_to_test_cid('customers_with_plugins.ods') csv_path = dev_test.path_to_test_data('valid_customers.csv') exit_code = applications.process([ 'test_can_validate_proper_csv_with_plugins', '--plugins', dev_test.path_to_test_plugins(), cid_path, csv_path ]) self.assertEqual(0, exit_code)
def test_fails_on_invalid_csv_source_file_with_not_observed_count_expression( self): cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls")) data_path = dev_test.path_to_test_data( "broken_customers_with_too_many_branches.csv") reader = validio.Reader(cid, data_path) reader.validate_rows() self.assertRaises(errors.CheckError, reader.close)
def test_can_handle_all_field_formats_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("alltypes.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue(isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[2], fields.ChoiceFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[4], fields.DecimalFieldFormat))
def test_fails_on_non_existent_data(self): if six.PY2: expected_error_class = EnvironmentError else: expected_error_class = IOError cid_path = dev_test.path_to_test_cid('customers.xls') self.assertRaises( expected_error_class, applications.process, ['test_fails_on_non_existent_data', cid_path, 'no_such_data.csv'])
def test_can_handle_checks_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("customers.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue( isinstance(cid_reader.check_for(cid_reader.check_names[0]), checks.IsUniqueCheck)) self.assertTrue( isinstance(cid_reader.check_for(cid_reader.check_names[1]), checks.DistinctCountCheck))
def test_can_read_delimited_rows(self): # TODO: either get rid of the CID and move it to test_iotools or use validate.Reader and move it to test_validate. delimited_cid = interface.Cid( dev_test.path_to_test_cid("icd_customers.xls")) delimited_rows = rowio.delimited_rows( dev_test.path_to_test_data("valid_customers.csv"), delimited_cid._data_format) first_row = next(delimited_rows) self.assertEqual(first_row, ['38000', '23', 'John', 'Doe', 'male', '08.03.1957'])
def test_can_read_fixed_rows(self): cid_path = dev_test.path_to_test_cid('customers_fixed.ods') customer_cid = interface.Cid(cid_path) fixed_path = dev_test.path_to_test_data('valid_customers_fixed.txt') field_names_and_lengths = interface.field_names_and_lengths(customer_cid) rows = list(rowio.fixed_rows(fixed_path, customer_cid.data_format.encoding, field_names_and_lengths)) self.assertNotEqual(0, len(rows)) for row_index in range(len(rows) - 1): row = rows[row_index] next_row = rows[row_index + 1] self.assertNotEqual(0, len(row)) self.assertEqual(len(row), len(next_row))
def test_fails_on_delimited_with_unterminated_quote(self): cid_path = dev_test.path_to_test_cid('customers.ods') customer_cid = interface.Cid(cid_path) broken_delimited_path = dev_test.path_to_test_data( 'broken_customers_with_unterminated_quote.csv') try: list( rowio.delimited_rows(broken_delimited_path, customer_cid.data_format)) except errors.DataFormatError as error: error_message = '%s' % error self.assertTrue('cannot parse delimited file' in error_message, 'error_message=%r' % error_message)
def test_can_handle_all_field_formats_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("alltypes.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue( isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[2], fields.ChoiceFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[4], fields.DecimalFieldFormat))
def test_can_read_fields_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid('cid_customers.xls') cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader.field_names[0], 'customer_id') self.assertTrue(isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertEqual(cid_reader.field_names[1], 'surname') self.assertTrue(isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[1].length.items, ranges.Range('...60').items) self.assertEqual(cid_reader.field_names[2], 'first_name') self.assertTrue(isinstance(cid_reader.field_formats[2], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[2].length.items, ranges.Range('...60').items) self.assertTrue(cid_reader.field_formats[2].is_allowed_to_be_empty) self.assertEqual(cid_reader.field_names[3], 'date_of_birth') self.assertTrue(isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertEqual(cid_reader.field_names[4], 'gender') self.assertTrue(isinstance(cid_reader.field_formats[4], fields.ChoiceFieldFormat)) self.assertTrue(cid_reader.field_formats[4].is_allowed_to_be_empty)
def _build_and_validate_many_customers(): icd_ods_path = dev_test.path_to_test_cid("customers.ods") # TODO: Write to 'build/many_customers.csv' many_customers_csv_path = dev_test.path_to_test_data( "lots_of_customers.csv") _build_lots_of_customers_csv(many_customers_csv_path, 50) # Validate the data using the API, so in case of errors we get specific information. customers_cid = interface.Cid(icd_ods_path) with validio.Reader(customers_cid, many_customers_csv_path) as reader: reader.validate_rows() # Validate the data using the command line application in order to use # the whole tool chain from an end user's point of view. exit_code = applications.main( ["test_performance.py", icd_ods_path, many_customers_csv_path]) if exit_code != 0: raise ValueError("exit code of performance test must be 0 but is %d" % exit_code)
def test_can_read_csv_cid(self): source_ods_cid_path = dev_test.path_to_test_cid('customers.ods') target_csv_cid_path = dev_test.path_to_test_cid('customers.csv') _ods.toCsv(source_ods_cid_path, target_csv_cid_path) self._test_can_read_cid('csv') os.remove(target_csv_cid_path)
def test_can_open_and_validate_fixed(self): cid = interface.Cid(dev_test.path_to_test_cid("customers_fixed.xls")) with validio.Reader(cid, dev_test.path_to_test_data("valid_customers_fixed.txt")) as reader: reader.validate_rows()
def test_can_deal_with_broken_cid(self): broken_cid_path = dev_test.path_to_test_cid('broken_syntax_error.ods') self.assertEqual(1, applications.main(['test', broken_cid_path]))
def _test_can_read_cid(self, suffix): cid_path = dev_test.path_to_test_cid('customers.' + suffix) exit_code = applications.process( ['test_can_read_valid_' + suffix + '_cid', cid_path]) self.assertEqual(0, exit_code)
def test_can_deal_with_broken_data(self): cid_path = dev_test.path_to_test_cid('customers.ods') data_path = dev_test.path_to_test_data('broken_customers.csv') self.assertEqual(1, applications.main(['test', cid_path, data_path]))
def test_can_read_cid(self): cid_path = dev_test.path_to_test_cid('customers.ods') self.assertEqual(0, applications.main(['test', cid_path]))
def test_can_validate_proper_data(self): cid_path = dev_test.path_to_test_cid('customers.ods') data_path = dev_test.path_to_test_data('valid_customers.csv') self.assertEqual(0, applications.main(['test', cid_path, data_path]))
def _test_can_read_cid(self, suffix): cid_path = dev_test.path_to_test_cid('cid_customers.' + suffix) exit_code = applications.process(['test_can_read_valid_' + suffix + '_cid', cid_path]) self.assertEqual(0, exit_code)
def test_can_read_csv_cid(self): source_ods_cid_path = dev_test.path_to_test_cid('cid_customers.ods') target_csv_cid_path = dev_test.path_to_test_cid('cid_customers.csv') _ods.to_csv(source_ods_cid_path, target_csv_cid_path) self._test_can_read_cid('csv') os.remove(target_csv_cid_path)
def test_can_validate_proper_csv(self): cid_path = dev_test.path_to_test_cid('customers.xls') csv_path = dev_test.path_to_test_data('valid_customers.csv') exit_code = applications.process( ['test_can_validate_proper_csv', cid_path, csv_path]) self.assertEqual(0, exit_code)
def setUp(self): self._cid_path = dev_test.path_to_test_cid("icd_customers.xls") self._cid = interface.Cid(self._cid_path) self._data_path = dev_test.path_to_test_data("valid_customers.csv")