def test_can_read_fields_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid('cid_customers.xls') cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader.field_names[0], 'customer_id') self.assertTrue( isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertEqual(cid_reader.field_names[1], 'surname') self.assertTrue( isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[1].length.items, ranges.Range('...60').items) self.assertEqual(cid_reader.field_names[2], 'first_name') self.assertTrue( isinstance(cid_reader.field_formats[2], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[2].length.items, ranges.Range('...60').items) self.assertTrue(cid_reader.field_formats[2].is_allowed_to_be_empty) self.assertEqual(cid_reader.field_names[3], 'date_of_birth') self.assertTrue( isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertEqual(cid_reader.field_names[4], 'gender') self.assertTrue( isinstance(cid_reader.field_formats[4], fields.ChoiceFieldFormat)) self.assertTrue(cid_reader.field_formats[4].is_allowed_to_be_empty)
def test_can_read_excel_and_create_data_format_delimited(self): cid_reader = interface.Cid() source_path = dev_test.CID_CUSTOMERS_XLS_PATH cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader._data_format.format, "delimited") self.assertEqual(cid_reader._data_format.header, 1)
def test_can_extract_all_excel_field_types(self): field_types_path = dev_test.path_to_test_data('fieldtypes.xls') for row_number, row in enumerate(rowio.excel_rows(field_types_path)): self.assertEqual(3, len(row)) if row_number >= 1: _, excel_value, cutplace_value = row self.assertEqual(cutplace_value, excel_value)
def test_fails_on_excel_from_ods(self): ods_path = dev_test.path_to_test_data('valid_customers.ods') try: list(rowio.excel_rows(ods_path)) self.fail() except errors.DataFormatError as anticipated_error: dev_test.assert_fnmatches(self, str(anticipated_error), '* (R1C1): cannot read Excel file: *')
def test_fails_on_excel_from_csv(self): csv_path = dev_test.CUSTOMERS_CSV_PATH try: list(rowio.excel_rows(csv_path)) self.fail() except errors.DataFormatError as anticipated_error: dev_test.assert_fnmatches(self, str(anticipated_error), '* (R1C1): cannot read Excel file: *')
def test_can_handle_checks_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.CID_CUSTOMERS_XLS_PATH cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue( isinstance(cid_reader.check_for(cid_reader.check_names[0]), checks.IsUniqueCheck))
def test_can_read_excel_and_create_data_format_delimited(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("icd_customers.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader._data_format.format, "delimited") self.assertEqual(cid_reader._data_format.header, 1)
def test_can_handle_all_field_formats_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("alltypes.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue(isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[2], fields.ChoiceFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertTrue(isinstance(cid_reader.field_formats[4], fields.DecimalFieldFormat))
def test_can_handle_checks_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("customers.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue( isinstance(cid_reader.check_for(cid_reader.check_names[0]), checks.IsUniqueCheck)) self.assertTrue( isinstance(cid_reader.check_for(cid_reader.check_names[1]), checks.DistinctCountCheck))
def write_create(cid_path, cid_reader): #TODO: add option for different cid types cid_reader.read(cid_path, rowio.excel_rows(cid_path)) create_path = os.path.splitext(cid_path)[0] + '_create.sql' # TODO: Add option to specify target folder for SQL files. _log.info('write SQL create statements to "%s"', create_path) with io.open(create_path, 'w', encoding='utf-8') as create_file: # TODO: Add option for encoding. sql_factory = SqlFactory(cid_reader, os.path.splitext(cid_path)[0]) create_file.write(sql_factory.create_table_statement())
def write_create(cid_path, cid_reader): # TODO: Add option for different cid types. cid_reader.read(cid_path, rowio.excel_rows(cid_path)) create_path = os.path.splitext(cid_path)[0] + '_create.sql' # TODO: Add option to specify target folder for SQL files. _log.info('write SQL create statements to "%s"', create_path) with io.open(create_path, 'w', encoding='utf-8') as create_file: # TODO: Add option for encoding. table = os.path.splitext(os.path.basename(cid_path))[0] sql_factory = SqlFactory(cid_reader, table) create_file.write(sql_factory.create_table_statement())
def test_can_write_xlsx(self): rows_to_write = ( ('a', 'b', 'c'), (1, 2, 3), ) test_build_folder = dev_test.path_to_test_folder('build') _tools.mkdirs(test_build_folder) xlsx_path = os.path.join(test_build_folder, 'test_can_write_xlsx.xlsx') with rowio.XlsxRowWriter(xlsx_path) as xlsx_writer: for row_to_write in rows_to_write: xlsx_writer.write_row(row_to_write) for row_index, row_read in enumerate(rowio.excel_rows(xlsx_path)): string_row_written = [six.text_type(item) for item in rows_to_write[row_index]] self.assertEqual(string_row_written, row_read)
def test_can_handle_all_field_formats_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid("alltypes.xls") cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue( isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[2], fields.ChoiceFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertTrue( isinstance(cid_reader.field_formats[4], fields.DecimalFieldFormat))
def _raw_rows(self): data_format = self.cid.data_format format = data_format.format if format == data.FORMAT_EXCEL: return rowio.excel_rows(self._source_data_stream_or_path, data_format.sheet) elif format == data.FORMAT_DELIMITED: return rowio.delimited_rows(self._source_data_stream_or_path, data_format) elif format == data.FORMAT_FIXED: return rowio.fixed_rows( self._source_data_stream_or_path, data_format.encoding, interface.field_names_and_lengths(self.cid), data_format.line_delimiter) elif format == data.FORMAT_ODS: return rowio.ods_rows(self._source_data_stream_or_path, data_format.sheet) else: assert False, 'format=%r' % format
def test_can_read_fields_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid('cid_customers.xls') cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader.field_names[0], 'customer_id') self.assertTrue(isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertEqual(cid_reader.field_names[1], 'surname') self.assertTrue(isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[1].length.items, ranges.Range('...60').items) self.assertEqual(cid_reader.field_names[2], 'first_name') self.assertTrue(isinstance(cid_reader.field_formats[2], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[2].length.items, ranges.Range('...60').items) self.assertTrue(cid_reader.field_formats[2].is_allowed_to_be_empty) self.assertEqual(cid_reader.field_names[3], 'date_of_birth') self.assertTrue(isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertEqual(cid_reader.field_names[4], 'gender') self.assertTrue(isinstance(cid_reader.field_formats[4], fields.ChoiceFieldFormat)) self.assertTrue(cid_reader.field_formats[4].is_allowed_to_be_empty)
def test_can_handle_checks_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.CID_CUSTOMERS_XLS_PATH cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertTrue(isinstance(cid_reader.check_for(cid_reader.check_names[0]), checks.IsUniqueCheck))
def test_can_read_excel_rows(self): excel_path = dev_test.path_to_test_data('valid_customers.xls') self._assert_rows_contain_data(rowio.excel_rows(excel_path))