def test_can_read_fields_from_excel(self): cid_reader = interface.Cid() source_path = dev_test.path_to_test_cid('cid_customers.xls') cid_reader.read(source_path, rowio.excel_rows(source_path)) self.assertEqual(cid_reader.field_names[0], 'customer_id') self.assertTrue( isinstance(cid_reader.field_formats[0], fields.IntegerFieldFormat)) self.assertEqual(cid_reader.field_names[1], 'surname') self.assertTrue( isinstance(cid_reader.field_formats[1], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[1].length.items, ranges.Range('...60').items) self.assertEqual(cid_reader.field_names[2], 'first_name') self.assertTrue( isinstance(cid_reader.field_formats[2], fields.TextFieldFormat)) self.assertEqual(cid_reader.field_formats[2].length.items, ranges.Range('...60').items) self.assertTrue(cid_reader.field_formats[2].is_allowed_to_be_empty) self.assertEqual(cid_reader.field_names[3], 'date_of_birth') self.assertTrue( isinstance(cid_reader.field_formats[3], fields.DateTimeFieldFormat)) self.assertEqual(cid_reader.field_names[4], 'gender') self.assertTrue( isinstance(cid_reader.field_formats[4], fields.ChoiceFieldFormat)) self.assertTrue(cid_reader.field_formats[4].is_allowed_to_be_empty)
def __init__(self, field_name, is_allowed_to_be_empty, length_text, rule, data_format, empty_value=None): super(IntegerFieldFormat, self).__init__( field_name, is_allowed_to_be_empty, length_text, rule, data_format, empty_value) is_fixed_format = (data_format.format == data.FORMAT_FIXED) has_length = (length_text is not None) and (length_text.strip() != '') if has_length: length = self.length if is_fixed_format: # For fixed data format, use an implicit range starting from # 1 to take into account that leading and trailing blanks # might be missing from the rule parts. assert self.length.lower_limit == self.length.upper_limit length = ranges.Range('1...%d' % self.length.upper_limit) length_range = ranges.create_range_from_length(length) has_rule = (rule is not None) and (rule.strip() != '') if has_rule: rule_range = ranges.Range(rule) if has_length: if has_rule: # Both a length and a rule have been specified: check if all # non ``None`` parts of each item of the rule fit within the # range of the length. Then use the rule as valid range. for rule_item in rule_range.items: partial_rule_limits = [ partial_rule_limit for partial_rule_limit in rule_item if partial_rule_limit is not None ] for partial_rule_limit in partial_rule_limits: length_of_partial_rule_limit = _tools.length_of_int(partial_rule_limit) try: length.validate( "length of partial rule limit '%d'" % partial_rule_limit, length_of_partial_rule_limit) except errors.RangeValueError as error: message = "length must be consistent with rule: %s" % error raise errors.InterfaceError(message) self.valid_range = rule_range else: # A length but no rule has been specified: derive a valid # range from the length. self.valid_range = length_range else: if has_rule: # No length but a rule has been specified: use the rule as # valid range. self.valid_range = rule_range else: # No length and no rule has been specified: use a default # range of signed 32 bit integer. If the user wants a bigger # range, he has to specify it. Python's ``int`` scales to any # range as long as there is enough memory available to # represent it. self.valid_range = ranges.Range(ranges.DEFAULT_INTEGER_RANGE_TEXT)
def test_fails_on_create_range_from_negative_length(self): self.assertRaises(errors.RangeValueError, ranges.create_range_from_length, ranges.Range("-19...-1")) self.assertRaises(errors.RangeValueError, ranges.create_range_from_length, ranges.Range("-19...1")) self.assertRaises(errors.RangeValueError, ranges.create_range_from_length, ranges.Range("-1...0")) self.assertRaises(errors.RangeValueError, ranges.create_range_from_length, ranges.Range("0...0"))
def test_can_parse_proper_ranges(self): self.assertEqual(ranges.Range("1").items, [(1, 1)]) self.assertEqual(ranges.Range("1...").items, [(1, None)]) self.assertEqual(ranges.Range("...1").items, [(None, 1)]) self.assertEqual(ranges.Range("1" + "\u2026" + "2").items, [(1, 2)]) self.assertEqual(ranges.Range("-1...2").items, [(-1, 2)]) self.assertEqual(ranges.Range("1:").items, [(1, None)]) self.assertEqual(ranges.Range(":1").items, [(None, 1)]) self.assertEqual(ranges.Range("1:2").items, [(1, 2)]) self.assertEqual(ranges.Range("-1:2").items, [(-1, 2)])
def test_can_read_field_type_text_field(self): cid_reader = interface.Cid() cid_reader.read('inline', [['d', 'format', 'delimited'], ['f', 'branch_id', '38000', '', '5']]) self.assertEqual(cid_reader.field_names[0], 'branch_id') self.assertEqual(cid_reader.field_formats[0].length.description, ranges.Range('5').description)
def test_can_validate_with_lower_and_upper_limit(self): lower_and_upper_range = ranges.Range("-1...1") lower_and_upper_range.validate("x", - 1) lower_and_upper_range.validate("x", 0) lower_and_upper_range.validate("x", 1) self.assertRaises(errors.RangeValueError, lower_and_upper_range.validate, "x", - 2) self.assertRaises(errors.RangeValueError, lower_and_upper_range.validate, "x", 2)
def test_can_get_lower_limit(self): self.assertEqual(ranges.Range("5...9").lower_limit, 5) self.assertEqual(ranges.Range("0...").lower_limit, 0) self.assertEqual(ranges.Range("...0").lower_limit, None) self.assertEqual(ranges.Range("...1, 3...").lower_limit, None) self.assertEqual(ranges.Range("5...9").lower_limit, 5) self.assertEqual(ranges.Range("1...2, 5...9").lower_limit, 1) self.assertEqual(ranges.Range("5...9, 1...2").lower_limit, 1)
def __init__(self, description, rule, available_field_names, location=None): super(FullNameLengthIsInRangeCheck, self).__init__(description, rule, available_field_names, location) self._full_name_range = ranges.Range(rule) self.reset()
def _test_can_handle_empty_range(self, description): empty_range = ranges.Range(description) self.assertEqual(empty_range.items, None) self.assertEqual(empty_range.lower_limit, None) self.assertEqual(empty_range.upper_limit, None) self.assertIsNone(empty_range.validate("x", ranges.MIN_INTEGER - 1)) self.assertIsNone(empty_range.validate("x", 1)) self.assertIsNone(empty_range.validate("x", 0)) self.assertIsNone(empty_range.validate("x", -1)) self.assertIsNone(empty_range.validate("x", ranges.MAX_INTEGER + 1))
def test_can_validate_with_multi_range(self): multi_range = ranges.Range("1...4, 7...9") multi_range.validate("x", 1) multi_range.validate("x", 7) multi_range.validate("x", 9) self.assertRaises(errors.RangeValueError, multi_range.validate, "x", - 3) self.assertRaises(errors.RangeValueError, multi_range.validate, "x", 0) self.assertRaises(errors.RangeValueError, multi_range.validate, "x", 5) self.assertRaises(errors.RangeValueError, multi_range.validate, "x", 6) self.assertRaises(errors.RangeValueError, multi_range.validate, "x", 10) self.assertRaises(errors.RangeValueError, multi_range.validate, "x", 723)
def __init__(self, field_name, is_allowed_to_be_empty, length_text, rule, data_format, empty_value=None): super(IntegerFieldFormat, self).__init__(field_name, is_allowed_to_be_empty, length_text, rule, data_format, empty_value) # The default range is 32 bit. If the user wants a bigger range, he # has to specify it. Python's ``int`` scales to any range as long # there is enough memory available to represent it. has_length = (length_text is not None) and (length_text.strip() != '') has_rule = (rule is not None) and (rule.strip() != '') if has_length: length = self._length if data_format.format == data.FORMAT_FIXED and self._length.lower_limit == self._length.upper_limit: length = ranges.Range('1...%d' % self._length.upper_limit) length_range = ranges.create_range_from_length(length) if length_range.lower_limit == 1: self._is_allowed_to_be_empty = False if has_rule: rule_range = ranges.Range(rule) if length_range.upper_limit is not None and rule_range.upper_limit is not None \ and length_range.upper_limit < rule_range.upper_limit: raise errors.FieldValueError('length upper limit must be greater than the rule upper limit') if length_range.lower_limit is not None and rule_range.lower_limit is not None \ and length_range.lower_limit > rule_range.lower_limit: raise errors.FieldValueError('rule lower limit must be less than the length lower limit') self.valid_range = rule_range else: self.valid_range = length_range else: if has_rule: self.valid_range = ranges.Range(rule) else: self.valid_range = ranges.Range(ranges.DEFAULT_INTEGER_RANGE_TEXT)
def __init__(self, field_name, is_allowed_to_be_empty, length_text, rule, data_format, empty_value=None): assert field_name is not None assert field_name, 'field_name must not be empty' assert is_allowed_to_be_empty in (False, True), 'is_allowed_to_be_empty=%r' % is_allowed_to_be_empty assert rule is not None, 'to specify "no rule" use "" instead of None' assert data_format is not None # TODO #82: Cleanup validation for declared field formats. self._field_name = field_name self._is_allowed_to_be_empty = is_allowed_to_be_empty self._length = ranges.Range(length_text) self._rule = rule self._data_format = data_format self._empty_value = empty_value self._example = None
def as_sql_number(field_name, field_is_allowed_to_be_empty, field_length, field_rule, range_rule, db): if range_rule is None: range_rule = ranges.Range(field_rule, ranges.DEFAULT_INTEGER_RANGE_TEXT) column_def = "" if (field_rule == '') and (field_length.description is not None): range_limit = 10**max([item[1] for item in field_length.items ]) # get the highest integer of the range else: range_limit = max([rule[1] for rule in range_rule.items ]) # get the highest integer of the range if range_limit <= MAX_SMALLINT: column_def = field_name + " smallint" elif range_limit <= MAX_INTEGER: column_def = field_name + " integer" else: if db in (MSSQL, DB2) and range_limit <= MAX_BIGINT: column_def = field_name + " bigint" else: """column_def, _ = DecimalFieldFormat(self._field_name, self._is_allowed_to_be_empty, self._length.description, self._rule, self._data_format, self._empty_value).as_sql(db)""" if not field_is_allowed_to_be_empty: column_def += " not null" constraint = "" for i in range(len(range_rule.items)): if i == 0: constraint = "constraint chk_" + field_name + " check( " constraint += "( " + field_name + " between " + str(range_rule.lower_limit) + " and " + \ str(range_rule.upper_limit) + " )" if i < len(range_rule.items) - 1: constraint += " or " else: constraint += " )" return [column_def, constraint]
def test_can_override_default_range(self): self.assertEqual(ranges.Range("1...2", "2...3").items, [(1, 2)])
def test_can_use_default_range(self): self.assertEqual(ranges.Range("", "2...3").items, [(2, 3)])
def test_can_parse_text_range(self): self.assertEqual(ranges.Range("\"a\"").items, [(97, 97)])
def test_can_parse_symbolic_range(self): self.assertEqual(ranges.Range("TAB").items, [(9, 9)]) self.assertEqual(ranges.Range("vt").items, [(11, 11)]) self.assertEqual(ranges.Range("Tab...Vt").items, [(9, 11)]) self.assertEqual(ranges.Range("Tab...11").items, [(9, 11)])
def test_can_parse_multiple_ranges(self): self.assertEqual(ranges.Range("1, 3").items, [(1, 1), (3, 3)]) self.assertEqual(ranges.Range("1...2, 5...").items, [(1, 2), (5, None)])
def test_can_parse_hex_range(self): self.assertEqual(ranges.Range("0x7f").items, [(127, 127)]) self.assertEqual(ranges.Range("0x7F").items, [(127, 127)])
def set_property(self, name, value, location=None): r""" Set data format property ``name`` to ``value`` possibly translating ``value`` from a human readable representation to an internal one. :param str name: any of the ``KEY_*`` constants :param value: the value to set the property to as it would show up in a CID. \ In some cases, the value will be translated to an internal representation. \ For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \ :py:attr:`cutplace.data.line_delimiter` being ``'\n'``. :type value: str or None :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property """ assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name assert name is not None assert name == name.lower( ), 'property name must be lower case: %r' % name assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER)) name = name.replace(' ', '_') property_attribute_name = '_' + name if property_attribute_name not in self.__dict__: valid_property_names = _tools.human_readable_list( list(self.__dict__.keys())) raise errors.InterfaceError( _('data format property %s for format %s is %s but must be one of %s' ) % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location) if name == KEY_ENCODING: try: codecs.lookup(value) except LookupError: raise errors.InterfaceError( _('value for data format property %s is %s but must be a valid encoding' ) % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location) self.encoding = value elif name == KEY_HEADER: self.header = DataFormat._validated_int_at_least_0( name, value, location) elif name == KEY_VALIDATE_HEADER_ROW_AGAINST_FIELD_NAMES: self.validate_header_row_against_field_names = DataFormat._validated_bool( KEY_VALIDATE_HEADER_ROW_AGAINST_FIELD_NAMES, value, location) elif name == KEY_ALLOWED_CHARACTERS: try: self._allowed_characters = ranges.Range(value) except errors.InterfaceError as error: raise errors.InterfaceError( _('data format property %s must be a valid range: %s') % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location) elif name == KEY_DECIMAL_SEPARATOR: self.decimal_separator = DataFormat._validated_choice( KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location) elif name == KEY_ESCAPE_CHARACTER: self.escape_character = DataFormat._validated_choice( KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location) elif name == KEY_ITEM_DELIMITER: item_delimiter = DataFormat._validated_character( KEY_ITEM_DELIMITER, value, location) if item_delimiter == '\x00': raise errors.InterfaceError( _("data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)" ) % _compat.text_repr(KEY_ITEM_DELIMITER), location) self.item_delimiter = item_delimiter elif name == KEY_LINE_DELIMITER: try: self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[ value.lower()] except KeyError: raise errors.InterfaceError( _('line delimiter %s must be changed to one of: %s') % (_compat.text_repr(value), _tools.human_readable_list( self._VALID_LINE_DELIMITER_TEXTS)), location) elif name == KEY_QUOTE_CHARACTER: self.quote_character = DataFormat._validated_choice( KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location) elif name == KEY_SHEET: self.sheet = DataFormat._validated_int_at_least_0( KEY_SHEET, value, location) elif name == KEY_SKIP_INITIAL_SPACE: self.skip_initial_space = DataFormat._validated_bool( KEY_SKIP_INITIAL_SPACE, value, location) elif name == KEY_THOUSANDS_SEPARATOR: self.thousands_separator = DataFormat._validated_choice( KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location) elif name == KEY_QUOTING: result = DataFormat._validated_choice(KEY_QUOTING, value, _VALID_QUOTING, location, ignore_case=True) self.quoting = READABLE_TO_CSV_QUOTING_FORMAT[result] elif name == KEY_STRICT_FIELD_NAMES: self.strict_field_names = DataFormat._validated_bool( KEY_STRICT_FIELD_NAMES, value, location) else: assert False, 'name=%r' % name
def _test_fails_with_interface_error(self, description, anticipated_error_message_pattern): try: ranges.Range(description) self.fail("test must fail with InterfaceError") except errors.InterfaceError as anticipated_error: dev_test.assert_fnmatches(self, str(anticipated_error), anticipated_error_message_pattern)
def test_can_create_range_from_length(self): self.assertEqual(ranges.create_range_from_length(ranges.Range("1...")).items, None) self.assertEqual(ranges.create_range_from_length(ranges.Range("1...1")).items, [(0, 9)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("1...3")).items, [(-99, 999)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("0...")).items, None) self.assertEqual(ranges.create_range_from_length(ranges.Range("0...1")).items, [(0, 9)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("0...3")).items, [(-99, 999)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("")).items, None) self.assertEqual(ranges.create_range_from_length(ranges.Range("...1")).items, [(0, 9)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("...3")).items, [(-99, 999)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("2...2")).items, [(-9, -1), (10, 99)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("2...5")).items, [(-9999, -1), (10, 99999)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("3...8")).items, [(-9999999, -10), (100, 99999999)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("2...")).items, [(None, -1), (10, None)]) self.assertEqual(ranges.create_range_from_length(ranges.Range("3...")).items, [(None, -10), (100, None)]) self.assertEqual( ranges.create_range_from_length( ranges.Range("3...4, 10...")).items, [(-999, -10), (100, 9999), (None, -100000000), (1000000000, None)])
def test_can_display_multi_range(self): self.assertEqual(six.text_type(ranges.Range("9...10, 11...13")), "9...10, 11...13")
def test_can_validate_with_upper_limit_only(self): upper_range = ranges.Range("...1") upper_range.validate("x", 1) upper_range.validate("x", - 2) upper_range.validate("x", - (2 ** 32) - 1) self.assertRaises(errors.RangeValueError, upper_range.validate, "x", 2)
def test_can_validate_with_lower_limit_only(self): lower_range = ranges.Range("1...") lower_range.validate("x", 1) lower_range.validate("x", 2) lower_range.validate("x", 2 ** 32) self.assertRaises(errors.RangeValueError, lower_range.validate, "x", 0)
def test_can_set_allowed_characters(self): delimited_format = data.DataFormat(data.FORMAT_DELIMITED) delimited_format.set_property(data.KEY_ALLOWED_CHARACTERS, '3...5') self.assertEqual([(3, 5)], ranges.Range('3...5').items)
def test_can_display_lower_is_upper_length(self): self.assertEqual(six.text_type(ranges.Range("9...9")), "9")
def test_can_get_upper_limit(self): self.assertEqual(ranges.Range("1...2").upper_limit, 2) self.assertEqual(ranges.Range("0...").upper_limit, None) self.assertEqual(ranges.Range("...0").upper_limit, 0) self.assertEqual(ranges.Range("...1, 3...").upper_limit, None) self.assertEqual(ranges.Range("1...2, 5...9").upper_limit, 9)
def test_can_represent_range_containing_none(self): none_range = ranges.Range(None) self.assertEqual(six.text_type(none_range), six.text_type(None)) upper_none_range = ranges.Range("1...") upper_none_range.items.append(None) self.assertEqual(six.text_type(upper_none_range), "1..., None")