def test_can_handle_checks_from_excel(self):
     cid_reader = interface.Cid()
     source_path = dev_test.CID_CUSTOMERS_XLS_PATH
     cid_reader.read(source_path, rowio.excel_rows(source_path))
     self.assertTrue(
         isinstance(cid_reader.check_for(cid_reader.check_names[0]),
                    checks.IsUniqueCheck))
 def test_can_read_fields_from_excel(self):
     cid_reader = interface.Cid()
     source_path = dev_test.path_to_test_cid('cid_customers.xls')
     cid_reader.read(source_path, rowio.excel_rows(source_path))
     self.assertEqual(cid_reader.field_names[0], 'customer_id')
     self.assertTrue(
         isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat))
     self.assertEqual(cid_reader.field_names[1], 'surname')
     self.assertTrue(
         isinstance(cid_reader.field_formats[1], fields.TextFieldFormat))
     self.assertEqual(cid_reader.field_formats[1].length.items,
                      ranges.Range('...60').items)
     self.assertEqual(cid_reader.field_names[2], 'first_name')
     self.assertTrue(
         isinstance(cid_reader.field_formats[2], fields.TextFieldFormat))
     self.assertEqual(cid_reader.field_formats[2].length.items,
                      ranges.Range('...60').items)
     self.assertTrue(cid_reader.field_formats[2].is_allowed_to_be_empty)
     self.assertEqual(cid_reader.field_names[3], 'date_of_birth')
     self.assertTrue(
         isinstance(cid_reader.field_formats[3],
                    fields.DateTimeFieldFormat))
     self.assertEqual(cid_reader.field_names[4], 'gender')
     self.assertTrue(
         isinstance(cid_reader.field_formats[4], fields.ChoiceFieldFormat))
     self.assertTrue(cid_reader.field_formats[4].is_allowed_to_be_empty)
Example #3
0
 def test_fails_on_csv_source_file_with_more_elements_than_expected(self):
     cid_reader = interface.Cid(
         dev_test.path_to_test_cid("icd_customers.xls"))
     with validio.Reader(
             cid_reader,
             dev_test.path_to_test_data(
                 "broken_customers_more_elements.csv")) as reader:
         self.assertRaises(errors.DataError, reader.validate_rows)
Example #4
0
 def test_fails_on_invalid_csv_source_file_with_not_observed_count_expression(
         self):
     cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls"))
     data_path = dev_test.path_to_test_data(
         "broken_customers_with_too_many_branches.csv")
     reader = validio.Reader(cid, data_path)
     reader.validate_rows()
     self.assertRaises(errors.CheckError, reader.close)
Example #5
0
 def test_fails_on_delimited_with_unterminated_quote(self):
     customer_cid = interface.Cid(dev_test.CID_CUSTOMERS_ODS_PATH)
     broken_delimited_path = dev_test.path_to_test_data('broken_customers_with_unterminated_quote.csv')
     try:
         list(rowio.delimited_rows(broken_delimited_path, customer_cid.data_format))
     except errors.DataFormatError as error:
         error_message = '%s' % error
         self.assertTrue(
             'cannot parse delimited file' in error_message, 'error_message=%r' % error_message)
Example #6
0
 def test_can_skip_empty_rows(self):
     cid_reader = interface.Cid()
     cid_reader.read('inline', [
         [],
         [''],
         ['d', 'format', 'delimited'],
         ['f', 'some'],
     ])
     self.assertEqual(cid_reader._data_format.format, "delimited")
Example #7
0
    def validate(self):
        def add_log_line(line):
            self._validation_result_text.config(state=NORMAL)
            try:
                self._validation_result_text.insert(END, line + '\n')
                self._validation_result_text.see(END)
            finally:
                self._validation_result_text.config(state=DISABLED)

        def add_log_error_line(line):
            add_log_line('ERROR: %s' % line)

        def show_status_line(line):
            self._validation_status_text.set(line)
            self.master.update()

        assert self.cid_path != ''

        cid_name = os.path.basename(self.cid_path)
        self.clear_validation_result_text()
        add_log_line('%s: validating' % cid_name)
        self.enable_usable_widgets()
        cid = None
        try:
            cid = interface.Cid(self.cid_path)
            add_log_line('%s: ok' % cid_name)
        except errors.InterfaceError as error:
            add_log_error_line(error)
        except Exception as error:
            add_log_error_line('cannot read CID: %s' % error)

        if (cid is not None) and (self.data_path != ''):
            try:
                data_name = os.path.basename(self.data_path)
                add_log_line('%s: validating' % data_name)
                validator = validio.Reader(cid,
                                           self.data_path,
                                           on_error='yield')
                show_status_line('Validation started')
                last_update_time = time.time()
                for row_or_error in validator.rows():
                    now = time.time()
                    if (now - last_update_time) > 3:
                        last_update_time = now
                        show_status_line('%d rows validated' %
                                         (validator.accepted_rows_count +
                                          validator.rejected_rows_count))
                    if isinstance(row_or_error, errors.CutplaceError):
                        add_log_error_line(row_or_error)
                show_status_line('%d rows validated - finished' %
                                 (validator.accepted_rows_count +
                                  validator.rejected_rows_count))
                add_log_line('%s: %d rows accepted, %d rows rejected' %
                             (data_name, validator.accepted_rows_count,
                              validator.rejected_rows_count))
            except Exception as error:
                add_log_error_line('cannot validate data: %s' % error)
Example #8
0
 def test_can_handle_checks_from_excel(self):
     cid_reader = interface.Cid()
     source_path = dev_test.path_to_test_cid("customers.xls")
     cid_reader.read(source_path, rowio.excel_rows(source_path))
     self.assertTrue(
         isinstance(cid_reader.check_for(cid_reader.check_names[0]),
                    checks.IsUniqueCheck))
     self.assertTrue(
         isinstance(cid_reader.check_for(cid_reader.check_names[1]),
                    checks.DistinctCountCheck))
Example #9
0
 def test_can_read_delimited_rows(self):
     # TODO: either get rid of the CID and move it to test_iotools or use validate.Reader and move it to test_validate.
     delimited_cid = interface.Cid(
         dev_test.path_to_test_cid("icd_customers.xls"))
     delimited_rows = rowio.delimited_rows(
         dev_test.path_to_test_data("valid_customers.csv"),
         delimited_cid._data_format)
     first_row = next(delimited_rows)
     self.assertEqual(first_row,
                      ['38000', '23', 'John', 'Doe', 'male', '08.03.1957'])
Example #10
0
    def __init__(self, cid_or_path):
        assert cid_or_path is not None

        if isinstance(cid_or_path, six.string_types):
            self._cid = interface.Cid(cid_or_path)
        else:
            self._cid = cid_or_path
            assert self._cid.data_format.is_valid, \
                'DataFormat.validate() must be called before using a CID for validation'
        self._expected_item_count = len(self._cid.field_formats)
        self._location = None
        self._is_closed = False
Example #11
0
 def set_cid_from_path(self, cid_path):
     """
     Read the :py:class:`cutplace.interface.Cid` to be used by this
     application from ``cid_path``.
     """
     assert cid_path is not None
     new_cid = interface.Cid()
     _log.info('read CID from "%s"', cid_path)
     cid_rows = rowio.auto_rows(cid_path)
     new_cid.read(cid_path, cid_rows)
     self.cid = new_cid
     self.cid_path = cid_path
 def test_can_read_delimited_rows(self):
     # TODO: either get rid of the CID and move it to test_iotools or use validate.Reader and move it to test_validate.
     delimited_cid = interface.Cid(dev_test.CID_CUSTOMERS_ODS_PATH)
     delimited_rows = rowio.delimited_rows(dev_test.CUSTOMERS_CSV_PATH,
                                           delimited_cid.data_format)
     title_row = next(delimited_rows)
     self.assertEqual(
         title_row,
         ['customer_id', 'surname', 'first_name', 'born', 'gender'])
     first_data_row = next(delimited_rows)
     self.assertEqual(first_data_row,
                      ['1', 'Beck', 'Tyler', '1995-11-15', 'male'])
Example #13
0
 def test_can_read_fixed_rows(self):
     cid_path = dev_test.path_to_test_cid('customers_fixed.ods')
     customer_cid = interface.Cid(cid_path)
     fixed_path = dev_test.path_to_test_data('valid_customers_fixed.txt')
     field_names_and_lengths = interface.field_names_and_lengths(customer_cid)
     rows = list(rowio.fixed_rows(fixed_path, customer_cid.data_format.encoding, field_names_and_lengths))
     self.assertNotEqual(0, len(rows))
     for row_index in range(len(rows) - 1):
         row = rows[row_index]
         next_row = rows[row_index + 1]
         self.assertNotEqual(0, len(row))
         self.assertEqual(len(row), len(next_row))
Example #14
0
 def test_can_handle_all_field_formats_from_excel(self):
     cid_reader = interface.Cid()
     source_path = dev_test.path_to_test_cid("alltypes.xls")
     cid_reader.read(source_path, rowio.excel_rows(source_path))
     self.assertTrue(
         isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[1], fields.TextFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[2], fields.ChoiceFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[3],
                    fields.DateTimeFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[4], fields.DecimalFieldFormat))
Example #15
0
    def test_can_create_decimal_field(self):
        cid = interface.Cid()
        cid.read('customers', [
            ['D', 'Format', 'delimited'],
            ['D', 'Line delimiter', 'any'],
            ['D', 'Item delimiter', ','],
            ['D', 'Quote character', '"'],
            ['D', 'Escape character', '\\'],
            ['D', 'Encoding', 'ISO-8859-1'],
            ['D', 'Allowed characters', '32:'],
            ['F', 'latitude', '1.5853', '', '', 'Decimal'],
        ])

        sql_factory = sql.SqlFactory(cid, 'customers')

        for _, field_type, _, _, is_not_null, _ in sql_factory.sql_fields():
            self.assertEqual(field_type, 'decimal')
            self.assertEqual(is_not_null, False)
Example #16
0
    def test_can_create_int_field(self):
        cid = interface.Cid()
        cid.read('customers', [
            ['D', 'Format', 'delimited'],
            ['D', 'Line delimiter', 'any'],
            ['D', 'Item delimiter', ','],
            ['D', 'Quote character', '"'],
            ['D', 'Escape character', '\\'],
            ['D', 'Encoding', 'ISO-8859-1'],
            ['D', 'Allowed characters', '32:'],
            ['F', 'customer_id', '12345', '', '', 'Integer', '0...99999'],
        ])

        sql_factory = sql.SqlFactory(cid, 'customers')

        for field in sql_factory.sql_fields():
            self.assertEqual(field[1], 'int')
            self.assertEqual(field[4], False)
Example #17
0
    def test_can_handle_oracle_sql_dialect(self):
        cid = interface.Cid()
        cid.read('customers', [
            ['D', 'Format', 'delimited'],
            ['D', 'Line delimiter', 'any'],
            ['D', 'Item delimiter', ','],
            ['D', 'Quote character', '"'],
            ['D', 'Escape character', '\\'],
            ['D', 'Encoding', 'ISO-8859-1'],
            ['D', 'Allowed characters', '32:'],
            ['F', 'latitude', '1.5853', '', '', 'Decimal'],
            [
                'F', 'small', '1', '', '', 'Integer',
                '0...%s' % six.text_type(sql.MAX_SMALLINT)
            ],
            [
                'F', 'int', '1', '', '', 'Integer',
                '0...%s' % six.text_type(sql.MAX_SMALLINT + 1)
            ],
            [
                'F', 'big', '1', '', '', 'Integer',
                '0...%s' % six.text_type(sql.MAX_INTEGER + 1)
            ],
            [
                'F', 'decimal', '1', '', '', 'Integer',
                '0...%s' % six.text_type(sql.MAX_BIGINT + 1)
            ],
            ['F', 'surname', 'Doe', '', '1...60', 'Text'],
        ])

        sql_factory = sql.SqlFactory(cid, 'customers', sql.PL_SQL_DIALECT)

        sql_fields = list(sql_factory.sql_fields())
        self.assertEqual(sql_fields[0][1], 'number')
        self.assertEqual(sql_fields[0][4], False)
        self.assertEqual(sql_fields[1][1], 'int')
        self.assertEqual(sql_fields[1][4], False)
        self.assertEqual(sql_fields[2][1], 'int')
        self.assertEqual(sql_fields[2][4], False)
        self.assertEqual(sql_fields[3][1], 'number')
        self.assertEqual(sql_fields[3][4], False)
        self.assertEqual(sql_fields[4][1], 'number')
        self.assertEqual(sql_fields[4][4], False)
        self.assertEqual(sql_fields[5][1], 'varchar2')
Example #18
0
    def test_can_create_char_field(self):
        cid = interface.Cid()
        cid.read('customers', [
            ['D', 'Format', 'delimited'],
            ['D', 'Line delimiter', 'any'],
            ['D', 'Item delimiter', ','],
            ['D', 'Quote character', '"'],
            ['D', 'Escape character', '\\'],
            ['D', 'Encoding', 'ISO-8859-1'],
            ['D', 'Allowed characters', '32:'],
            ['F', 'surname', 'Doe', 'x', '1...60', 'Text'],
        ])

        sql_factory = sql.SqlFactory(cid, 'customers')

        for field in sql_factory.sql_fields():
            self.assertEqual(field[1], 'varchar')
            self.assertEqual(field[2], 60)
            self.assertEqual(field[4], True)
Example #19
0
 def test_can_handle_all_field_formats_from_array(self):
     cid_reader = interface.Cid()
     cid_reader.read(
         'inline',
         [['d', 'format', 'delimited'], ['f', 'int', '', '', '', 'Integer'],
          ['f', 'choice', '', '', '', 'Choice', 'x,y'],
          ['f', 'date', '', '', '', 'DateTime'],
          ['f', 'dec', '', '', '', 'Decimal', ''], ['f', 'text']])
     self.assertTrue(
         isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[1], fields.ChoiceFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[2],
                    fields.DateTimeFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[3], fields.DecimalFieldFormat))
     self.assertTrue(
         isinstance(cid_reader.field_formats[4], fields.TextFieldFormat))
Example #20
0
def _build_and_validate_many_customers():
    icd_ods_path = dev_test.path_to_test_cid("customers.ods")
    # TODO: Write to 'build/many_customers.csv'
    many_customers_csv_path = dev_test.path_to_test_data(
        "lots_of_customers.csv")
    _build_lots_of_customers_csv(many_customers_csv_path, 50)

    # Validate the data using the API, so in case of errors we get specific information.
    customers_cid = interface.Cid(icd_ods_path)
    with validio.Reader(customers_cid, many_customers_csv_path) as reader:
        reader.validate_rows()

    # Validate the data using the command line application in order to use
    # the whole tool chain from an end user's point of view.
    exit_code = applications.main(
        ["test_performance.py", icd_ods_path, many_customers_csv_path])
    if exit_code != 0:
        raise ValueError("exit code of performance test must be 0 but is %d" %
                         exit_code)
Example #21
0
    def test_can_create_date_field(self):
        cid = interface.Cid()
        cid.read('customers', [
            ['D', 'Format', 'delimited'],
            ['D', 'Line delimiter', 'any'],
            ['D', 'Item delimiter', ','],
            ['D', 'Quote character', '"'],
            ['D', 'Escape character', '\\'],
            ['D', 'Encoding', 'ISO-8859-1'],
            ['D', 'Allowed characters', '32:'],
            [
                'F', 'date_of_birth', '03.11.1969', '', '', 'DateTime',
                'DD.MM.YYYY'
            ],
        ])

        sql_factory = sql.SqlFactory(cid, 'customers')

        for _, field_type, _, _, is_not_null, _ in sql_factory.sql_fields():
            self.assertEqual(field_type, 'date')
            self.assertEqual(is_not_null, False)
Example #22
0
    def test_can_create_sql_factory(self):
        cid = interface.Cid()
        cid.read('customers', [
            ['D', 'Format', 'delimited'],
            ['D', 'Line delimiter', 'any'],
            ['D', 'Item delimiter', ','],
            ['D', 'Quote character', '"'],
            ['D', 'Escape character', '\\'],
            ['D', 'Encoding', 'ISO-8859-1'],
            ['D', 'Allowed characters', '32:'],
            ['F', 'branch_id', '38123', '', '', 'RegEx'],
            ['F', 'customer_id', '12345', '', '', 'Integer', '0...99999'],
            ['F', 'first_name', 'John', 'X', '', 'Text'],
            ['F', 'surname', 'Doe', '', '1...60', 'Text'],
            ['F', 'gender', 'male', '', '', 'Choice', 'male, female, unknown'],
            [
                'F', 'date_of_birth', '03.11.1969', '', '', 'DateTime',
                'DD.MM.YYYY'
            ],
        ])

        sql_factory = sql.SqlFactory(cid, 'customers')
        self.assertEqual(cid.field_names, sql_factory.cid._field_names)
Example #23
0
def process(argv=None):
    """
    Do whatever the command line options ``argv`` request. In case of error,
    raise an appropriate :py:exc:`Exception`.

    Before calling this, module :py:mod:`logging` has to be set up properly.
    For example, by calling :py:func:`logging.basicConfig`.

    :return: 0 unless ``argv`` requested to validate one or more files and \
      at least one of them contained rejected data. In this case, the \
      result is 1.
    """
    if argv is None:  # pragma: no cover
        argv = sys.argv
    assert argv

    result = 0
    cutplace_app = CutplaceApp()
    cutplace_app.set_options(argv)
    if cutplace_app.is_gui:
        data_path = cutplace_app.data_paths[0] if len(
            cutplace_app.data_paths) >= 1 else None
        gui.open_gui(cutplace_app.cid_path, data_path)
    elif cutplace_app.is_create_sql:
        cid_reader = interface.Cid()
        sql.write_create(cutplace_app.cid_path, cid_reader)
    elif cutplace_app.data_paths:
        for data_path in cutplace_app.data_paths:
            try:
                cutplace_app.validate(data_path)
            except (EnvironmentError, OSError) as error:
                raise EnvironmentError(
                    _("cannot read data file %r: %s") % (data_path, error))
        if not cutplace_app.all_validations_were_ok:
            result = 1
    return result
Example #24
0
 def test_fails_on_invalid_row_typ(self):
     cid_reader = interface.Cid()
     self.assertRaises(errors.InterfaceError, cid_reader.read, 'inline',
                       [['x']])
Example #25
0
 def test_fails_on_python_keyword_as_field_name(self):
     cid_to_read = interface.Cid()
     self.assertRaises(
         errors.InterfaceError, cid_to_read.read, 'inline',
         [['d', 'format', 'delimited'], ['f', 'class', '38000', '', '5']])
Example #26
0
 def test_can_create_empty_cid(self):
     cid = interface.Cid()
     cid_name = os.path.splitext(os.path.basename(
         cid._location.file_path))[0]
     self.assertEqual('test_interface', cid_name)
Example #27
0
 def test_fails_on_missing_data_format_property_name(self):
     cid_reader = interface.Cid()
     self.assertRaises(errors.InterfaceError, cid_reader.read, 'inline', [
         ['d', 'format', 'delimited'],
         ['d'],
     ])
Example #28
0
 def test_fails_on_invalid_csv_source_file_with_duplicates(self):
     cid = interface.Cid(dev_test.path_to_test_cid("icd_customers.xls"))
     with validio.Reader(cid, dev_test.path_to_test_data("broken_customers_with_duplicates.csv")) as reader:
         self.assertRaises(errors.CheckError, reader.validate_rows)
Example #29
0
 def test_can_open_and_validate_fixed_source_file(self):
     cid = interface.Cid(dev_test.path_to_test_cid("customers_fixed.xls"))
     with validio.Reader(cid, dev_test.path_to_test_data("valid_customers_fixed.txt")) as reader:
         reader.validate_rows()
Example #30
0
 def setUp(self):
     self._cid_path = dev_test.path_to_test_cid("icd_customers.xls")
     self._cid = interface.Cid(self._cid_path)
     self._data_path = dev_test.path_to_test_data("valid_customers.csv")