Beispiel #1
0
    def validated_value(self, value):
        assert value

        if value not in self.choices:
            raise errors.FieldValueError(
                "value is %s but must be one of: %s"
                % (_compat.text_repr(value), _tools.human_readable_list(self.choices)))
        return value
Beispiel #2
0
    def validated_value(self, value):
        assert value

        if value not in self.choices:
            raise errors.FieldValueError(
                "value is %s but must be one of: %s"
                % (_compat.text_repr(value), _tools.human_readable_list(self.choices)))
        return value
Beispiel #3
0
    def field_index(self, field_name):
        """
        The column index of  the field named ``field_name`` starting with 0.
        """
        assert field_name in self._field_name_to_index_map, \
            "unknown field name '%s' must be replaced by one of: %s" \
            % (field_name, _tools.human_readable_list(sorted(self.field_names)))

        return self._field_name_to_index_map[field_name]
Beispiel #4
0
    def field_index(self, field_name):
        """
        The column index of  the field named ``field_name`` starting with 0.
        """
        assert field_name in self._field_name_to_index_map, \
            "unknown field name '%s' must be replaced by one of: %s" \
            % (field_name, _tools.human_readable_list(sorted(self.field_names)))

        return self._field_name_to_index_map[field_name]
Beispiel #5
0
    def add_check_row(self, possibly_incomplete_items):
        """
        Add a check as declared in ``possibly_incomplete_items``, which
        ideally is a list composed of 3 elements:

        1. description ('customer_id_must_be_unique')
        2. type (e.g. 'IsUnique'  mapping to :py:class:`cutplace.checks.IsUniqueCheck`)
        3. rule (e.g. 'customer_id')

        Missing items are interpreted as empty string (``''``), additional
        items are ignored.

        :raises cutplace.errors.InterfaceError: on broken \
          ``possibly_incomplete_items``
        """
        assert possibly_incomplete_items is not None

        items = list(possibly_incomplete_items)
        # HACK: Ignore possible concatenated (empty) cells between description and type.
        while (len(items) >= 2) and (items[1].strip() == ''):
            del items[1]

        check_description, check_type, check_rule = (items + 3 * [''])[:3]
        self._location.advance_cell()
        if check_description == '':
            raise errors.InterfaceError('check description must be specified',
                                        self._location)
        self._location.advance_cell()
        check_class_name = check_type + "Check"
        if check_class_name not in self._check_name_to_class_map:
            list_of_available_check_types = _tools.human_readable_list(
                sorted(self._check_name_to_class_map.keys()))
            raise errors.InterfaceError(
                "check type is '%s' but must be one of: %s" %
                (check_type, list_of_available_check_types), self._location)
        _log.debug("create check: %s(%r, %r)", check_type, check_description,
                   check_rule)
        check_class = self._create_check_class(check_type)
        check = check_class.__new__(check_class, check_description, check_rule,
                                    self._field_names, self._location)
        check.__init__(check_description, check_rule, self._field_names,
                       self._location)
        self._location.set_cell(1)
        existing_check = self._check_name_to_check_map.get(check_description)
        if existing_check is not None:
            raise errors.InterfaceError(
                "check description must be used only once: %s" %
                _compat.text_repr(check_description), self._location,
                "first declaration", existing_check.location)
        self._check_name_to_check_map[check_description] = check
        self._check_names.append(check_description)
        assert len(self.check_names) == len(self._check_name_to_check_map)
Beispiel #6
0
    def _create_class(self, name_to_class_map, class_qualifier, class_name_appendix, type_name):
        assert name_to_class_map
        assert class_qualifier
        assert class_name_appendix
        assert type_name

        class_name = class_qualifier.split(".")[-1] + class_name_appendix
        result = name_to_class_map.get(class_name)
        if result is None:
            raise errors.InterfaceError(
                "cannot find class for %s %s: related class is %s but must be one of: %s" % (
                    type_name, class_qualifier, class_name,
                    _tools.human_readable_list(sorted(name_to_class_map.keys()))), self._location)
        return result
Beispiel #7
0
    def _create_class(self, name_to_class_map, class_qualifier, class_name_appendix, type_name):
        assert name_to_class_map
        assert class_qualifier
        assert class_name_appendix
        assert type_name

        class_name = class_qualifier.split(".")[-1] + class_name_appendix
        result = name_to_class_map.get(class_name)
        if result is None:
            raise errors.InterfaceError(
                "cannot find class for %s %s: related class is %s but must be one of: %s" % (
                    type_name, class_qualifier, class_name,
                    _tools.human_readable_list(sorted(name_to_class_map.keys()))), self._location)
        return result
Beispiel #8
0
def main(arguments):
    assert arguments is not None

    _FORMAT_CSV = "csv"
    _FORMAT_RST = "rst"
    _FORMATS = [_FORMAT_CSV, _FORMAT_RST]
    _DEFAULT_FORMAT = _FORMAT_CSV
    _DEFAULT_SHEET = 1

    parser = argparse.ArgumentParser(description='convert ODS file to other formats')
    parser.add_argument(
        "-f", "--format", metavar="FORMAT", default=_DEFAULT_FORMAT, choices=sorted(_FORMATS), dest="format",
        help="target format: %s (default: %s)" % (_tools.human_readable_list(_FORMATS), _DEFAULT_FORMAT))
    parser.add_argument(
        "-1", "--heading", action="store_true", dest="firstRowIsHeading",
        help="render first row as heading")
    parser.add_argument(
        "-s", "--sheet", metavar="SHEET", default=_DEFAULT_SHEET, type=int, dest="sheet",
        help="sheet to convert (default: %d)" % _DEFAULT_SHEET)
    parser.add_argument('source_ods_path', metavar='ODS-FILE', help='the ODS file to convert')
    parser.add_argument('target_path', metavar='TARGET-FILE', nargs='?', help='the target file to write')
    args = parser.parse_args(arguments)

    # Additional command line argument validation.
    if args.sheet < 1:
        parser.error("option --sheet is %d but must be at least 1" % args.sheet)
    if (args.format == _FORMAT_CSV) and args.firstRowIsHeading:
        parser.error("option --heading can not be used with --format=csv")

    if args.target_path is None:
        assert args.format in _FORMATS
        suffix = '.' + args.format
        args.target_path = _tools.with_suffix(args.source_ods_path, suffix)

    _log.info("convert %r to %r using format %r", args.source_ods_path, args.target_path, args.format)
    try:
        if args.format == _FORMAT_CSV:
            to_csv(args.source_ods_path, args.target_path, sheet=args.sheet)
        elif args.format == _FORMAT_RST:
            to_rst(
                args.source_ods_path, args.target_path, first_row_is_heading=args.firstRowIsHeading, sheet=args.sheet)
        else:  # pragma: no cover
            raise NotImplementedError("format=%r" % args.format)
    except (EnvironmentError, OSError) as error:
        _log.error("cannot convert ods: %s", error)
        sys.exit(1)
    except Exception as error:
        _log.exception("cannot convert ods: %s", error)
        sys.exit(1)
Beispiel #9
0
    def add_check_row(self, possibly_incomplete_items):
        """
        Add a check as declared in ``possibly_incomplete_items``, which
        ideally is a list composed of 3 elements:

        1. description ('customer_id_must_be_unique')
        2. type (e.g. 'IsUnique'  mapping to :py:class:`cutplace.checks.IsUniqueCheck`)
        3. rule (e.g. 'customer_id')

        Missing items are interpreted as empty string (``''``), additional
        items are ignored.

        :raises cutplace.errors.InterfaceError: on broken \
          ``possibly_incomplete_items``
        """
        assert possibly_incomplete_items is not None

        items = list(possibly_incomplete_items)
        # HACK: Ignore possible concatenated (empty) cells between description and type.
        while (len(items) >= 2) and (items[1].strip() == ''):
            del items[1]

        check_description, check_type, check_rule = (items + 3 * [''])[:3]
        self._location.advance_cell()
        if check_description == '':
            raise errors.InterfaceError(
                'check description must be specified', self._location)
        self._location.advance_cell()
        check_class_name = check_type + "Check"
        if check_class_name not in self._check_name_to_class_map:
            list_of_available_check_types = _tools.human_readable_list(sorted(self._check_name_to_class_map.keys()))
            raise errors.InterfaceError(
                "check type is '%s' but must be one of: %s"
                % (check_type, list_of_available_check_types),
                self._location)
        _log.debug("create check: %s(%r, %r)", check_type, check_description, check_rule)
        check_class = self._create_check_class(check_type)
        check = check_class.__new__(check_class, check_description, check_rule, self._field_names, self._location)
        check.__init__(check_description, check_rule, self._field_names, self._location)
        self._location.set_cell(1)
        existing_check = self._check_name_to_check_map.get(check_description)
        if existing_check is not None:
            raise errors.InterfaceError(
                "check description must be used only once: %s" % _compat.text_repr(check_description),
                self._location, "first declaration", existing_check.location)
        self._check_name_to_check_map[check_description] = check
        self._check_names.append(check_description)
        assert len(self.check_names) == len(self._check_name_to_check_map)
Beispiel #10
0
    def _validated_choice(key, value, choices, location, ignore_case=False):
        """
        Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set
        to ``True``. If the supposed result is not on of the available
        ``choices``, raise `errors.InterfaceError`.
        """
        assert key
        assert value is not None
        assert choices

        result = value if not ignore_case else value.lower()
        if result not in choices:
            raise errors.InterfaceError(
                'data format property %s is %s but must be one of: %s'
                % (_compat.text_repr(key), _compat.text_repr(value), _tools.human_readable_list(choices)), location)
        return result
Beispiel #11
0
    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[
                            0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(
                        _VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter),
                         valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s' %
                    (_compat.text_repr(actual_line_delimiter),
                     _compat.text_repr(line_delimiter)), location)
        return result
Beispiel #12
0
    def _validated_choice(key, value, choices, location, ignore_case=False):
        """
        Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set
        to ``True``. If the supposed result is not on of the available
        ``choices``, raise `errors.InterfaceError`.
        """
        assert key
        assert value is not None
        assert choices

        result = value if not ignore_case else value.lower()
        if result not in choices:
            raise errors.InterfaceError(
                _('data format property %s is %s but must be one of: %s') %
                (_compat.text_repr(key), _compat.text_repr(value),
                 _tools.human_readable_list(choices)), location)
        return result
Beispiel #13
0
    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(_VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s'
                    % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location)
        return result
Beispiel #14
0
    def field_value_for(self, field_name, row):
        """
        The value for field ``field_name`` in ``row``.

        :param list row: the row to obtain the :py:class:`str` value for \
          ``field_name`` from

        :raises AssertionError: if ``field_name`` is not part of the CID
        :raises AssertionError: if ``row`` does not have the expected number of items
        """
        assert field_name in self._field_name_to_index_map, \
            "unknown field name %r must be replaced by one of: %s" \
            % (field_name, _tools.human_readable_list(sorted(self.field_names)))
        assert row is not None
        actual_row_count = len(row)
        expected_row_count = len(self.field_names)
        assert actual_row_count == expected_row_count, \
            "row must have %d items but has %d: %s" % (expected_row_count, actual_row_count, row)

        return row[self._field_name_to_index_map[field_name]]
Beispiel #15
0
def code_for_symbolic_token(name, value, location):
    """
    The numeric code for text representing an a symbolic name in ``value``,
    which has to be one of the values in
    :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP`.

    :param str name: the name of the value as it is known to the end user
    :param str value: the text that represents a symbolic name
    :param cutplace.errors.Location location: the location of ``value`` or ``None``
    """
    assert name is not None
    assert value is not None

    try:
        result = errors.NAME_TO_ASCII_CODE_MAP[value.lower()]
    except KeyError:
        valid_symbols = _tools.human_readable_list(sorted(errors.NAME_TO_ASCII_CODE_MAP.keys()))
        raise errors.InterfaceError(
            'symbolic name %s for %s must be one of: %s' % (_compat.text_repr(value), name, valid_symbols), location)
    return result
Beispiel #16
0
    def field_value_for(self, field_name, row):
        """
        The value for field ``field_name`` in ``row``.

        :param list row: the row to obtain the :py:class:`str` value for \
          ``field_name`` from

        :raises AssertionError: if ``field_name`` is not part of the CID
        :raises AssertionError: if ``row`` does not have the expected number of items
        """
        assert field_name in self._field_name_to_index_map, \
            "unknown field name %r must be replaced by one of: %s" \
            % (field_name, _tools.human_readable_list(sorted(self.field_names)))
        assert row is not None
        actual_row_count = len(row)
        expected_row_count = len(self.field_names)
        assert actual_row_count == expected_row_count, \
            "row must have %d items but has %d: %s" % (expected_row_count, actual_row_count, row)

        return row[self._field_name_to_index_map[field_name]]
Beispiel #17
0
def field_name_index(field_name_to_look_up, available_field_names, location):
    """
    The index of ``field_name_to_look_up`` (without leading or trailing
    white space) in ``available_field_names``.

    :param cutplace.errors.Location location: location used in case of errors
    :raise cutplace.errors.InterfaceError: if ``field_name_to_look_up`` is \
      not part of ``available_field_names``
    """
    assert field_name_to_look_up is not None
    assert field_name_to_look_up == field_name_to_look_up.strip()
    assert available_field_names

    field_name_to_look_up = field_name_to_look_up.strip()
    try:
        field_index = available_field_names.index(field_name_to_look_up)
    except ValueError:
        raise errors.InterfaceError(
            'unknown field name %s must be replaced by one of: %s' %
            (_compat.text_repr(field_name_to_look_up),
             _tools.human_readable_list(available_field_names)), location)
    return field_index
Beispiel #18
0
def field_name_index(field_name_to_look_up, available_field_names, location):
    """
    The index of ``field_name_to_look_up`` (without leading or trailing
    white space) in ``available_field_names``.

    :param cutplace.errors.Location location: location used in case of errors
    :raise cutplace.errors.InterfaceError: if ``field_name_to_look_up`` is \
      not part of ``available_field_names``
    """
    assert field_name_to_look_up is not None
    assert field_name_to_look_up == field_name_to_look_up.strip()
    assert available_field_names

    field_name_to_look_up = field_name_to_look_up.strip()
    try:
        field_index = available_field_names.index(field_name_to_look_up)
    except ValueError:
        raise errors.InterfaceError(
            'unknown field name %s must be replaced by one of: %s'
            % (_compat.text_repr(field_name_to_look_up), _tools.human_readable_list(available_field_names)),
            location)
    return field_index
Beispiel #19
0
def fixed_rows(fixed_source, encoding, field_name_and_lengths, line_delimiter='any'):
    r"""
    Rows found in file ``fixed_source`` using ``encoding``. The name and
    (fixed) length of the fields for each row are specified as a list of
    tuples ``(name, length)``. Each row can end with a line feed unless
    ``line_delimiter`` equals ``None``. Valid values are: ``'\n'``, ``'\r'``
    and ``'\r\n'``, in which case other values result in a
    `errors.DataFormatError`. Additionally ``'any'`` accepts any of the
    previous values.
    """
    assert fixed_source is not None
    assert encoding is not None
    for name, length in field_name_and_lengths:
        assert name is not None
        assert length >= 1, 'length for %s must be at least 1 but is %s' % (name, length)
    assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS, \
        'line_delimiter=%s but must be one of: %s' % (_compat.text_repr(line_delimiter), _VALID_FIXED_LINE_DELIMITERS)

    # Predefine variable for access in local function.
    location = errors.Location(fixed_source, has_column=True)
    fixed_file = None
    # HACK: list with at most 1 character to be unread after a line feed. We
    # need to use a list so `_has_data_after_skipped_line_delimiter` can
    # modify its contents.
    unread_character_after_line_delimiter = [None]

    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(_VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s'
                    % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location)
        return result

    if isinstance(fixed_source, six.string_types):
        fixed_file = io.open(fixed_source, 'r', encoding=encoding)
        is_opened = True
    else:
        fixed_file = fixed_source
        is_opened = False

    has_data = True
    try:
        while has_data:
            field_index = 0
            row = []
            for field_name, field_length in field_name_and_lengths:
                if unread_character_after_line_delimiter[0] is None:
                    item = fixed_file.read(field_length)
                else:
                    assert len(unread_character_after_line_delimiter) == 1
                    item = unread_character_after_line_delimiter[0]
                    if field_length >= 2:
                        item += fixed_file.read(field_length - 1)
                    unread_character_after_line_delimiter[0] = None
                assert unread_character_after_line_delimiter[0] is None
                if not is_opened:
                    # Ensure that the input is a text file, `io.StringIO` or something similar. Binary files,
                    # `io.BytesIO` and the like cannot be used because the return bytes instead of strings.
                    # NOTE: We do not need to use _compat.text_repr(item) because type `unicode` does not fail here.
                    assert isinstance(item, six.text_type), \
                        '%s: fixed_source must yield strings but got type %s, value %r' % (location, type(item), item)
                item_length = len(item)
                if item_length == 0:
                    if field_index > 0:
                        names = [name for name, _ in field_name_and_lengths]
                        lengths = [length for _, length in field_name_and_lengths]
                        previous_field_index = field_index - 1
                        characters_needed_count = sum(lengths[field_index:])
                        list_of_missing_field_names = _tools.human_readable_list(names[field_index:], 'and')
                        raise errors.DataFormatError(
                            "after field '%s' %d characters must follow for: %s"
                            % (names[previous_field_index], characters_needed_count, list_of_missing_field_names),
                            location)
                    # End of input reached.
                    has_data = False
                elif item_length == field_length:
                    row.append(item)
                    location.advance_column(field_length)
                    field_index += 1
                else:
                    raise errors.DataFormatError(
                        "cannot read field '%s': need %d characters but found only %d: %s"
                        % (field_name, field_length, item_length, _compat.text_repr(item)), location)
            if has_data and not _has_data_after_skipped_line_delimiter():
                has_data = False
            if len(row) > 0:
                yield row
                location.advance_line()
    finally:
        if is_opened:
            fixed_file.close()
Beispiel #20
0
def fixed_rows(fixed_source,
               encoding,
               field_name_and_lengths,
               line_delimiter='any'):
    r"""
    Rows found in file ``fixed_source`` using ``encoding``. The name and
    (fixed) length of the fields for each row are specified as a list of
    tuples ``(name, length)``. Each row can end with a line feed unless
    ``line_delimiter`` equals ``None``. Valid values are: ``'\n'``, ``'\r'``
    and ``'\r\n'``, in which case other values result in a
    `errors.DataFormatError`. Additionally ``'any'`` accepts any of the
    previous values.
    """
    assert fixed_source is not None
    assert encoding is not None
    for name, length in field_name_and_lengths:
        assert name is not None
        assert length >= 1, 'length for %s must be at least 1 but is %s' % (
            name, length)
    assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS, \
        'line_delimiter=%s but must be one of: %s' % (_compat.text_repr(line_delimiter), _VALID_FIXED_LINE_DELIMITERS)

    # Predefine variable for access in local function.
    location = errors.Location(fixed_source, has_column=True)
    fixed_file = None
    # HACK: list with at most 1 character to be unread after a line feed. We
    # need to use a list so `_has_data_after_skipped_line_delimiter` can
    # modify its contents.
    unread_character_after_line_delimiter = [None]

    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[
                            0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(
                        _VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter),
                         valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s' %
                    (_compat.text_repr(actual_line_delimiter),
                     _compat.text_repr(line_delimiter)), location)
        return result

    if isinstance(fixed_source, six.string_types):
        fixed_file = io.open(fixed_source, 'r', encoding=encoding)
        is_opened = True
    else:
        fixed_file = fixed_source
        is_opened = False

    has_data = True
    try:
        while has_data:
            field_index = 0
            row = []
            for field_name, field_length in field_name_and_lengths:
                if unread_character_after_line_delimiter[0] is None:
                    item = fixed_file.read(field_length)
                else:
                    assert len(unread_character_after_line_delimiter) == 1
                    item = unread_character_after_line_delimiter[0]
                    if field_length >= 2:
                        item += fixed_file.read(field_length - 1)
                    unread_character_after_line_delimiter[0] = None
                assert unread_character_after_line_delimiter[0] is None
                if not is_opened:
                    # Ensure that the input is a text file, `io.StringIO` or something similar. Binary files,
                    # `io.BytesIO` and the like cannot be used because the return bytes instead of strings.
                    # NOTE: We do not need to use _compat.text_repr(item) because type `unicode` does not fail here.
                    assert isinstance(item, six.text_type), \
                        '%s: fixed_source must yield strings but got type %s, value %r' % (location, type(item), item)
                item_length = len(item)
                if item_length == 0:
                    if field_index > 0:
                        names = [name for name, _ in field_name_and_lengths]
                        lengths = [
                            length for _, length in field_name_and_lengths
                        ]
                        previous_field_index = field_index - 1
                        characters_needed_count = sum(lengths[field_index:])
                        list_of_missing_field_names = _tools.human_readable_list(
                            names[field_index:], 'and')
                        raise errors.DataFormatError(
                            "after field '%s' %d characters must follow for: %s"
                            % (names[previous_field_index],
                               characters_needed_count,
                               list_of_missing_field_names), location)
                    # End of input reached.
                    has_data = False
                elif item_length == field_length:
                    row.append(item)
                    location.advance_column(field_length)
                    field_index += 1
                else:
                    raise errors.DataFormatError(
                        "cannot read field '%s': need %d characters but found only %d: %s"
                        % (field_name, field_length, item_length,
                           _compat.text_repr(item)), location)
            if has_data and not _has_data_after_skipped_line_delimiter():
                has_data = False
            if len(row) > 0:
                yield row
                location.advance_line()
    finally:
        if is_opened:
            fixed_file.close()
Beispiel #21
0
 def test_can_build_human_readable_list(self):
     self.assertEqual(_tools.human_readable_list([]), "")
     self.assertEqual(_tools.human_readable_list(["a"]), "'a'")
     self.assertEqual(_tools.human_readable_list(["a", "b"]), "'a' or 'b'")
     self.assertEqual(_tools.human_readable_list(["a", "b", "c"]), "'a', 'b' or 'c'")
Beispiel #22
0
    def set_property(self, name, value, location=None):
        r"""
        Set data format property ``name`` to ``value`` possibly translating ``value`` from
        a human readable representation to an internal one.

        :param str name: any of the ``KEY_*`` constants
        :param value: the value to set the property to as it would show up in a CID. \
            In some cases, the value will be translated to an internal representation. \
            For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \
            :py:attr:`cutplace.data.line_delimiter` being ``'\n'``.
        :type value: str or None

        :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format
        :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property
        """
        assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name
        assert name is not None
        assert name == name.lower(
        ), 'property name must be lower case: %r' % name
        assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS,
                                                KEY_LINE_DELIMITER))

        name = name.replace(' ', '_')
        property_attribute_name = '_' + name
        if property_attribute_name not in self.__dict__:
            valid_property_names = _tools.human_readable_list(
                list(self.__dict__.keys()))
            raise errors.InterfaceError(
                _('data format property %s for format %s is %s but must be one of %s'
                  ) % (_compat.text_repr(name), self.format,
                       _compat.text_repr(value), valid_property_names),
                location)

        if name == KEY_ENCODING:
            try:
                codecs.lookup(value)
            except LookupError:
                raise errors.InterfaceError(
                    _('value for data format property %s is %s but must be a valid encoding'
                      ) % (_compat.text_repr(KEY_ENCODING),
                           _compat.text_repr(self.encoding)), location)
            self.encoding = value
        elif name == KEY_HEADER:
            self.header = DataFormat._validated_int_at_least_0(
                name, value, location)
        elif name == KEY_VALIDATE_HEADER_ROW_AGAINST_FIELD_NAMES:
            self.validate_header_row_against_field_names = DataFormat._validated_bool(
                KEY_VALIDATE_HEADER_ROW_AGAINST_FIELD_NAMES, value, location)
        elif name == KEY_ALLOWED_CHARACTERS:
            try:
                self._allowed_characters = ranges.Range(value)
            except errors.InterfaceError as error:
                raise errors.InterfaceError(
                    _('data format property %s must be a valid range: %s') %
                    (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error),
                    location)
        elif name == KEY_DECIMAL_SEPARATOR:
            self.decimal_separator = DataFormat._validated_choice(
                KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS,
                location)
        elif name == KEY_ESCAPE_CHARACTER:
            self.escape_character = DataFormat._validated_choice(
                KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS,
                location)
        elif name == KEY_ITEM_DELIMITER:
            item_delimiter = DataFormat._validated_character(
                KEY_ITEM_DELIMITER, value, location)
            if item_delimiter == '\x00':
                raise errors.InterfaceError(
                    _("data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)"
                      ) % _compat.text_repr(KEY_ITEM_DELIMITER), location)
            self.item_delimiter = item_delimiter
        elif name == KEY_LINE_DELIMITER:
            try:
                self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[
                    value.lower()]
            except KeyError:
                raise errors.InterfaceError(
                    _('line delimiter %s must be changed to one of: %s') %
                    (_compat.text_repr(value),
                     _tools.human_readable_list(
                         self._VALID_LINE_DELIMITER_TEXTS)), location)
        elif name == KEY_QUOTE_CHARACTER:
            self.quote_character = DataFormat._validated_choice(
                KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location)
        elif name == KEY_SHEET:
            self.sheet = DataFormat._validated_int_at_least_0(
                KEY_SHEET, value, location)
        elif name == KEY_SKIP_INITIAL_SPACE:
            self.skip_initial_space = DataFormat._validated_bool(
                KEY_SKIP_INITIAL_SPACE, value, location)
        elif name == KEY_THOUSANDS_SEPARATOR:
            self.thousands_separator = DataFormat._validated_choice(
                KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS,
                location)
        elif name == KEY_QUOTING:
            result = DataFormat._validated_choice(KEY_QUOTING,
                                                  value,
                                                  _VALID_QUOTING,
                                                  location,
                                                  ignore_case=True)
            self.quoting = READABLE_TO_CSV_QUOTING_FORMAT[result]
        elif name == KEY_STRICT_FIELD_NAMES:
            self.strict_field_names = DataFormat._validated_bool(
                KEY_STRICT_FIELD_NAMES, value, location)
        else:
            assert False, 'name=%r' % name
Beispiel #23
0
    def set_property(self, name, value, location=None):
        r"""
        Set data format property ``name`` to ``value`` possibly translating ``value`` from
        a human readable representation to an internal one.

        :param str name: any of the ``KEY_*`` constants
        :param value: the value to set the property to as it would show up in a CID. \
            In some cases, the value will be translated to an internal representation. \
            For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \
            :py:attr:`cutplace.data.line_delimiter` being ``'\n'``.
        :type value: str or None

        :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format
        :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property
        """
        assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name
        assert name is not None
        assert name == name.lower(), 'property name must be lower case: %r' % name
        assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER))

        name = name.replace(' ', '_')
        property_attribute_name = '_' + name
        if property_attribute_name not in self.__dict__:
            valid_property_names = _tools.human_readable_list(list(self.__dict__.keys()))
            raise errors.InterfaceError(
                'data format property %s for format %s is %s but must be one of %s'
                % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location)

        if name == KEY_ENCODING:
            try:
                codecs.lookup(value)
            except LookupError:
                raise errors.InterfaceError(
                    'value for data format property %s is %s but must be a valid encoding'
                    % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location)
            self.encoding = value
        elif name == KEY_HEADER:
            self.header = DataFormat._validated_int_at_least_0(name, value, location)
        elif name == KEY_ALLOWED_CHARACTERS:
            try:
                self._allowed_characters = ranges.Range(value)
            except errors.InterfaceError as error:
                raise errors.InterfaceError(
                    'data format property %s must be a valid range: %s'
                    % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location)
        elif name == KEY_DECIMAL_SEPARATOR:
            self.decimal_separator = DataFormat._validated_choice(
                KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location)
        elif name == KEY_ESCAPE_CHARACTER:
            self.escape_character = DataFormat._validated_choice(
                KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location)
        elif name == KEY_ITEM_DELIMITER:
            item_delimiter = DataFormat._validated_character(KEY_ITEM_DELIMITER, value, location)
            if item_delimiter == '\x00':
                raise errors.InterfaceError(
                    "data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)"
                    % _compat.text_repr(KEY_ITEM_DELIMITER), location)
            self.item_delimiter = item_delimiter
        elif name == KEY_LINE_DELIMITER:
            try:
                self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[value.lower()]
            except KeyError:
                raise errors.InterfaceError(
                    'line delimiter %s must be changed to one of: %s'
                    % (_compat.text_repr(value), _tools.human_readable_list(self._VALID_LINE_DELIMITER_TEXTS)),
                    location)
        elif name == KEY_QUOTE_CHARACTER:
            self.quote_character = DataFormat._validated_choice(
                KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location)
        elif name == KEY_SHEET:
            self.sheet = DataFormat._validated_int_at_least_0(KEY_SHEET, value, location)
        elif name == KEY_SKIP_INITIAL_SPACE:
            self.skip_initial_space = DataFormat._validated_bool(KEY_SKIP_INITIAL_SPACE, value, location)
        elif name == KEY_THOUSANDS_SEPARATOR:
            self.thousands_separator = DataFormat._validated_choice(
                KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location)
        else:
            assert False, 'name=%r' % name
Beispiel #24
0
 def test_can_build_human_readable_list(self):
     self.assertEqual(_tools.human_readable_list([]), '')
     self.assertEqual(_tools.human_readable_list(['a']), "'a'")
     self.assertEqual(_tools.human_readable_list(['a', 'b']), "'a' or 'b'")
     self.assertEqual(_tools.human_readable_list(['a', 'b', 'c']),
                      "'a', 'b' or 'c'")
Beispiel #25
0
def main(arguments):
    assert arguments is not None

    _FORMAT_CSV = "csv"
    _FORMAT_RST = "rst"
    _FORMATS = [_FORMAT_CSV, _FORMAT_RST]
    _DEFAULT_FORMAT = _FORMAT_CSV
    _DEFAULT_SHEET = 1

    parser = argparse.ArgumentParser(
        description='convert ODS file to other formats')
    parser.add_argument(
        "-f",
        "--format",
        metavar="FORMAT",
        default=_DEFAULT_FORMAT,
        choices=sorted(_FORMATS),
        dest="format",
        help="target format: %s (default: %s)" %
        (_tools.human_readable_list(_FORMATS), _DEFAULT_FORMAT))
    parser.add_argument("-1",
                        "--heading",
                        action="store_true",
                        dest="firstRowIsHeading",
                        help="render first row as heading")
    parser.add_argument("-s",
                        "--sheet",
                        metavar="SHEET",
                        default=_DEFAULT_SHEET,
                        type=int,
                        dest="sheet",
                        help="sheet to convert (default: %d)" % _DEFAULT_SHEET)
    parser.add_argument('source_ods_path',
                        metavar='ODS-FILE',
                        help='the ODS file to convert')
    parser.add_argument('target_path',
                        metavar='TARGET-FILE',
                        nargs='?',
                        help='the target file to write')
    args = parser.parse_args(arguments)

    # Additional command line argument validation.
    if args.sheet < 1:
        parser.error("option --sheet is %d but must be at least 1" %
                     args.sheet)
    if (args.format == _FORMAT_CSV) and args.firstRowIsHeading:
        parser.error("option --heading can not be used with --format=csv")

    if args.target_path is None:
        assert args.format in _FORMATS
        suffix = '.' + args.format
        args.target_path = _tools.with_suffix(args.source_ods_path, suffix)

    _log.info("convert %r to %r using format %r", args.source_ods_path,
              args.target_path, args.format)
    try:
        if args.format == _FORMAT_CSV:
            toCsv(args.source_ods_path, args.target_path, sheet=args.sheet)
        elif args.format == _FORMAT_RST:
            toRst(args.source_ods_path,
                  args.target_path,
                  firstRowIsHeading=args.firstRowIsHeading,
                  sheet=args.sheet)
        else:  # pragma: no cover
            raise NotImplementedError("format=%r" % args.format)
    except EnvironmentError as error:
        _log.error("cannot convert ods: %s", error)
        sys.exit(1)
    except Exception as error:
        _log.exception("cannot convert ods: %s", error)
        sys.exit(1)