Example #1
0
    def __init__(self, fieldName, isAllowedToBeEmpty, length, rule, dataFormat):
        super(ChoiceFieldFormat, self).__init__(fieldName, isAllowedToBeEmpty, length, rule, dataFormat, emptyValue="")
        self.choices = []

        # Split rule into tokens, ignoring white space.
        tokens = _tools.tokenizeWithoutSpace(rule)

        # Extract choices from rule tokens.
        previousToky = None
        toky = tokens.next()
        while not _tools.isEofToken(toky):
            if _tools.isCommaToken(toky):
                # Handle comma after comma without choice.
                if previousToky:
                    previousTokyText = previousToky[1]
                else:
                    previousTokyText = None
                raise FieldSyntaxError(u"choice value must precede a comma (,) but found: %r" % previousTokyText)
            choice = _tools.tokenText(toky)
            if not choice:
                raise FieldSyntaxError(
                    u"choice field must be allowed to be empty instead of containing an empty choice"
                )
            self.choices.append(choice)
            toky = tokens.next()
            if not _tools.isEofToken(toky):
                if not _tools.isCommaToken(toky):
                    raise FieldSyntaxError(u"comma (,) must follow choice value %r but found: %r" % (choice, toky[1]))
                # Process next choice after comma.
                toky = tokens.next()
                if _tools.isEofToken(toky):
                    raise FieldSyntaxError(u"trailing comma (,) must be removed")
        if not self.isAllowedToBeEmpty and not self.choices:
            raise FieldSyntaxError(u"choice field without any choices must be allowed to be empty")
Example #2
0
    def __init__(self, description, rule, availableFieldNames, location=None):
        super(IsUniqueCheck, self).__init__(description, rule, availableFieldNames, location)

        self.fieldNamesToCheck = []

        # Extract field names to check from rule.
        ruleReadLine = StringIO.StringIO(rule).readline
        toky = tokenize.generate_tokens(ruleReadLine)
        afterComma = True
        nextToken = toky.next()
        uniqueFieldNames = set()
        while not _tools.isEofToken(nextToken):
            tokenType = nextToken[0]
            tokenValue = nextToken[1]
            if afterComma:
                if tokenType != tokenize.NAME:
                    raise CheckSyntaxError(u"field name must contain only ASCII letters, numbers and underscores (_) "
                                           + "but found: %r [token type=%r]" % (tokenValue, tokenType))
                try:
                    fields.getFieldNameIndex(tokenValue, availableFieldNames)
                    if tokenValue in uniqueFieldNames:
                        raise CheckSyntaxError(u"duplicate field name for unique check must be removed: %s" % tokenValue)
                    uniqueFieldNames.add(tokenValue)
                except fields.FieldLookupError, error:
                    raise CheckSyntaxError(unicode(error))
                self.fieldNamesToCheck.append(tokenValue)
            elif not _tools.isCommaToken(nextToken):
                raise CheckSyntaxError(u"after field name a comma (,) must follow but found: %r" % (tokenValue))
Example #3
0
    def __init__(self, fieldName, isAllowedToBeEmpty, length, rule,
                 dataFormat):
        super(ChoiceFieldFormat, self).__init__(fieldName,
                                                isAllowedToBeEmpty,
                                                length,
                                                rule,
                                                dataFormat,
                                                emptyValue="")
        self.choices = []

        # Split rule into tokens, ignoring white space.
        tokens = _tools.tokenizeWithoutSpace(rule)

        # Extract choices from rule tokens.
        previousToky = None
        toky = tokens.next()
        while not _tools.isEofToken(toky):
            if _tools.isCommaToken(toky):
                # Handle comma after comma without choice.
                if previousToky:
                    previousTokyText = previousToky[1]
                else:
                    previousTokyText = None
                raise FieldSyntaxError(
                    u"choice value must precede a comma (,) but found: %r" %
                    previousTokyText)
            choice = _tools.tokenText(toky)
            if not choice:
                raise FieldSyntaxError(
                    u"choice field must be allowed to be empty instead of containing an empty choice"
                )
            self.choices.append(choice)
            toky = tokens.next()
            if not _tools.isEofToken(toky):
                if not _tools.isCommaToken(toky):
                    raise FieldSyntaxError(
                        u"comma (,) must follow choice value %r but found: %r"
                        % (choice, toky[1]))
                # Process next choice after comma.
                toky = tokens.next()
                if _tools.isEofToken(toky):
                    raise FieldSyntaxError(
                        u"trailing comma (,) must be removed")
        if not self.isAllowedToBeEmpty and not self.choices:
            raise FieldSyntaxError(
                u"choice field without any choices must be allowed to be empty"
            )
Example #4
0
def validatedFieldName(supposedFieldName, location=None):
    """
    Same as ``supposedFieldName`` except with surrounding white space removed, provided that it
    describes a valid field name. Otherwise, raise a `FieldSyntaxError` pointing to ``location``.
    """
    tokens = _tools.tokenizeWithoutSpace(supposedFieldName)
    tokenType, result, _, _, _ = tokens.next()
    if tokenType != token.NAME:
        message = (
            u"field name must be a valid Python name consisting of ASCII letters, underscore (%r) and digits but is: %r"
            % ("_", result)
        )
        raise FieldSyntaxError(message, location)
    if keyword.iskeyword(result):
        raise FieldSyntaxError(u"field name must not be a Python keyword but is: %r" % result, location)
    toky = tokens.next()
    if not _tools.isEofToken(toky):
        raise FieldSyntaxError(u"field name must be a single word but is: %r" % supposedFieldName, location)
    return result
Example #5
0
def validatedFieldName(supposedFieldName, location=None):
    """
    Same as ``supposedFieldName`` except with surrounding white space removed, provided that it
    describes a valid field name. Otherwise, raise a `FieldSyntaxError` pointing to ``location``.
    """
    tokens = _tools.tokenizeWithoutSpace(supposedFieldName)
    tokenType, result, _, _, _ = tokens.next()
    if tokenType != token.NAME:
        message = u"field name must be a valid Python name consisting of ASCII letters, underscore (%r) and digits but is: %r" % (
            "_", result)
        raise FieldSyntaxError(message, location)
    if keyword.iskeyword(result):
        raise FieldSyntaxError(
            u"field name must not be a Python keyword but is: %r" % result,
            location)
    toky = tokens.next()
    if not _tools.isEofToken(toky):
        raise FieldSyntaxError(
            u"field name must be a single word but is: %r" % supposedFieldName,
            location)
    return result
Example #6
0
    def _validatedCharacter(self, key, value):
        r"""
        A single character intended as value for data format property ``key``
        derived from ``value``, which can be:

        * a decimal or hex number (prefixed with "0x") referring to the ASCII/Unicode of the character
        * a string containing a single character such as "\t".
        * a symbolic name such as "Tab".

        Anything else yields a `DataFormatSyntaxError`.

        >>> format = DelimitedDataFormat()
        >>> format._validatedCharacter("x", "34")
        '"'
        >>> format._validatedCharacter("x", "9")
        '\t'
        >>> format._validatedCharacter("x", "0x9")
        '\t'
        >>> format._validatedCharacter("x", "Tab")
        '\t'
        >>> format._validatedCharacter("x", "\t")
        '\t'
        >>> format._validatedCharacter("x", "")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must be specified
        >>> format._validatedCharacter("x", "Tab Tab")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must describe a single character but is: 'Tab Tab'
        >>> format._validatedCharacter("x", "17.23")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: numeric value for data format property 'x' must be an integer but is: '17.23'
        >>> format._validatedCharacter("x", "Hugo")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: symbolic name 'Hugo' for data format property 'x' must be one of: 'cr', 'ff', 'lf', 'tab' or 'vt'
        >>> format._validatedCharacter("x", "( ")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '( '
        >>> format._validatedCharacter("x", "\"\\")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '"\\'
        >>> format._validatedCharacter("x", "\"abc\"")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: text for data format property 'x' must be a single character but is: '"abc"'
        """
        # TODO: Consolidate code with `ranges.__init__()`.
        assert key
        assert value is not None
        if len(value) == 1 and (value < "0" or value > "9"):
            result = value
        else:
            result = None
            tokens = tokenize.generate_tokens(
                StringIO.StringIO(value).readline)
            nextToken = tokens.next()
            if _tools.isEofToken(nextToken):
                raise DataFormatSyntaxError(
                    u"value for data format property %r must be specified" %
                    key)
            nextType = nextToken[0]
            nextValue = nextToken[1]
            if nextType == token.NUMBER:
                try:
                    if nextValue[:2].lower() == "0x":
                        nextValue = nextValue[2:]
                        base = 16
                    else:
                        base = 10
                    longValue = long(nextValue, base)
                except ValueError:
                    raise DataFormatSyntaxError(
                        u"numeric value for data format property %r must be an integer but is: %r"
                        % (key, value))
            elif nextType == token.NAME:
                try:
                    longValue = tools.SYMBOLIC_NAMES_MAP[nextValue.lower()]
                except KeyError:
                    validSymbols = _tools.humanReadableList(
                        sorted(tools.SYMBOLIC_NAMES_MAP.keys()))
                    raise DataFormatSyntaxError(
                        u"symbolic name %r for data format property %r must be one of: %s"
                        % (value, key, validSymbols))
            elif nextType == token.STRING:
                if len(nextValue) != 3:
                    raise DataFormatSyntaxError(
                        u"text for data format property %r must be a single character but is: %r"
                        % (key, value))
                leftQuote = nextValue[0]
                rightQuote = nextValue[2]
                assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote
                assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote
                longValue = ord(nextValue[1])
            else:
                raise DataFormatSyntaxError(
                    u"value for data format property %r must a number, a single character or a symbolic name but is: %r"
                    % (key, value))
            # Ensure there are no further tokens.
            nextToken = tokens.next()
            if not _tools.isEofToken(nextToken):
                raise DataFormatSyntaxError(
                    u"value for data format property %r must describe a single character but is: %r"
                    % (key, value))

            assert longValue is not None
            assert longValue >= 0
            result = chr(longValue)
        assert result is not None
        return result
Example #7
0
    def __init__(self, text, default=None):
        """
        Setup a range as specified by ``text``.

        ``text`` must be of the form "lower:upper" or "limit". In case ``text`` is empty (""), any
        value will be accepted by `validate()`. For example, "1:40" accepts values between 1
        and 40.

        ``default`` is an alternative text to use in case ``text`` is ``None`` or empty.
        """
        assert default is None or default.strip(), u"default=%r" % default

        # Find out if a `text` has been specified and if not, use optional `default` instead.
        hasText = (text is not None) and text.strip()
        if not hasText and default is not None:
            text = default
            hasText = True

        if not hasText:
            # Use empty ranges.
            self._description = None
            self._items = None
        else:
            self._description = text
            self._items = []
            # TODO: Consolidate code with `DelimitedDataFormat._validatedCharacter()`.
            tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
            endReached = False
            while not endReached:
                lower = None
                upper = None
                colonFound = False
                afterHyphen = False
                nextToken = tokens.next()
                while not _tools.isEofToken(nextToken) and not _tools.isCommaToken(nextToken):
                    nextType = nextToken[0]
                    nextValue = nextToken[1]
                    if nextType in (token.NAME, token.NUMBER, token.STRING):
                        if nextType == token.NUMBER:
                            try:
                                if nextValue[:2].lower() == "0x":
                                    nextValue = nextValue[2:]
                                    base = 16
                                else:
                                    base = 10
                                longValue = long(nextValue, base)
                            except ValueError:
                                raise RangeSyntaxError(u"number must be an integer but is: %r" % nextValue)
                            if afterHyphen:
                                longValue = - 1 * longValue
                                afterHyphen = False
                        elif nextType == token.NAME:
                            try:
                                longValue = tools.SYMBOLIC_NAMES_MAP[nextValue.lower()]
                            except KeyError:
                                validSymbols = _tools.humanReadableList(sorted(tools.SYMBOLIC_NAMES_MAP.keys()))
                                raise RangeSyntaxError(u"symbolic name %r must be one of: %s" % (nextValue, validSymbols))
                        elif nextType == token.STRING:
                            if len(nextValue) != 3:
                                raise RangeSyntaxError(u"text for range must contain a single character but is: %r" % nextValue)
                            leftQuote = nextValue[0]
                            rightQuote = nextValue[2]
                            assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote
                            assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote
                            longValue = ord(nextValue[1])
                        if colonFound:
                            if upper is None:
                                upper = longValue
                            else:
                                raise RangeSyntaxError("range must have at most lower and upper limit but found another number: %r" % nextValue)
                        elif lower is None:
                            lower = longValue
                        else:
                            raise RangeSyntaxError(u"number must be followed by colon (:) but found: %r" % nextValue)
                    elif afterHyphen:
                        raise RangeSyntaxError(u"hyphen (-) must be followed by number but found: %r" % nextValue)
                    elif (nextType == token.OP) and (nextValue == "-"):
                        afterHyphen = True
                    elif (nextType == token.OP) and (nextValue == ":"):
                        if colonFound:
                            raise RangeSyntaxError(u"range item must contain at most one colon (:)")
                        colonFound = True
                    else:
                        message = u"range must be specified using integer numbers, text, symbols and colon (:) but found: %r [token type: %r]" % (nextValue, nextType)
                        raise RangeSyntaxError(message)
                    nextToken = tokens.next()
                if afterHyphen:
                    raise RangeSyntaxError(u"hyphen (-) at end must be followed by number")

                # Decide upon the result.
                if (lower is None):
                    if (upper is None):
                        if colonFound:
                            # Handle ":".
                            # TODO: Handle ":" same as ""?
                            raise RangeSyntaxError(u"colon (:) must be preceded and/or succeeded by number")
                        else:
                            # Handle "".
                            result = None
                    else:
                        assert colonFound
                        # Handle ":y".
                        result = (None, upper)
                elif colonFound:
                    # Handle "x:" and "x:y".
                    if (upper is not None) and (lower > upper):
                        raise RangeSyntaxError(u"lower range %d must be greater or equal to upper range %d" % (lower, upper))
                    result = (lower, upper)
                else:
                    # Handle "x".
                    result = (lower, lower)
                if result is not None:
                    for item in self._items:
                        if self._itemsOverlap(item, result):
                            # TODO: use _repr_item() or something to display item in error message.
                            raise RangeSyntaxError(u"range items must not overlap: %r and %r"
                                                   % (self._repr_item(item), self._repr_item(result)))
                    self._items.append(result)
                if _tools.isEofToken(nextToken):
                    endReached = True
Example #8
0
    def __init__(self, text, default=None):
        """
        Setup a range as specified by ``text``.

        ``text`` must be of the form "lower:upper" or "limit". In case ``text`` is empty (""), any
        value will be accepted by `validate()`. For example, "1:40" accepts values between 1
        and 40.

        ``default`` is an alternative text to use in case ``text`` is ``None`` or empty.
        """
        assert default is None or default.strip(), u"default=%r" % default

        # Find out if a `text` has been specified and if not, use optional `default` instead.
        hasText = (text is not None) and text.strip()
        if not hasText and default is not None:
            text = default
            hasText = True

        if not hasText:
            # Use empty ranges.
            self._description = None
            self._items = None
        else:
            self._description = text
            self._items = []
            # TODO: Consolidate code with `DelimitedDataFormat._validatedCharacter()`.
            tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
            endReached = False
            while not endReached:
                lower = None
                upper = None
                colonFound = False
                afterHyphen = False
                nextToken = tokens.next()
                while not _tools.isEofToken(
                        nextToken) and not _tools.isCommaToken(nextToken):
                    nextType = nextToken[0]
                    nextValue = nextToken[1]
                    if nextType in (token.NAME, token.NUMBER, token.STRING):
                        if nextType == token.NUMBER:
                            try:
                                if nextValue[:2].lower() == "0x":
                                    nextValue = nextValue[2:]
                                    base = 16
                                else:
                                    base = 10
                                longValue = long(nextValue, base)
                            except ValueError:
                                raise RangeSyntaxError(
                                    u"number must be an integer but is: %r" %
                                    nextValue)
                            if afterHyphen:
                                longValue = -1 * longValue
                                afterHyphen = False
                        elif nextType == token.NAME:
                            try:
                                longValue = tools.SYMBOLIC_NAMES_MAP[
                                    nextValue.lower()]
                            except KeyError:
                                validSymbols = _tools.humanReadableList(
                                    sorted(tools.SYMBOLIC_NAMES_MAP.keys()))
                                raise RangeSyntaxError(
                                    u"symbolic name %r must be one of: %s" %
                                    (nextValue, validSymbols))
                        elif nextType == token.STRING:
                            if len(nextValue) != 3:
                                raise RangeSyntaxError(
                                    u"text for range must contain a single character but is: %r"
                                    % nextValue)
                            leftQuote = nextValue[0]
                            rightQuote = nextValue[2]
                            assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote
                            assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote
                            longValue = ord(nextValue[1])
                        if colonFound:
                            if upper is None:
                                upper = longValue
                            else:
                                raise RangeSyntaxError(
                                    "range must have at most lower and upper limit but found another number: %r"
                                    % nextValue)
                        elif lower is None:
                            lower = longValue
                        else:
                            raise RangeSyntaxError(
                                u"number must be followed by colon (:) but found: %r"
                                % nextValue)
                    elif afterHyphen:
                        raise RangeSyntaxError(
                            u"hyphen (-) must be followed by number but found: %r"
                            % nextValue)
                    elif (nextType == token.OP) and (nextValue == "-"):
                        afterHyphen = True
                    elif (nextType == token.OP) and (nextValue == ":"):
                        if colonFound:
                            raise RangeSyntaxError(
                                u"range item must contain at most one colon (:)"
                            )
                        colonFound = True
                    else:
                        message = u"range must be specified using integer numbers, text, symbols and colon (:) but found: %r [token type: %r]" % (
                            nextValue, nextType)
                        raise RangeSyntaxError(message)
                    nextToken = tokens.next()
                if afterHyphen:
                    raise RangeSyntaxError(
                        u"hyphen (-) at end must be followed by number")

                # Decide upon the result.
                if (lower is None):
                    if (upper is None):
                        if colonFound:
                            # Handle ":".
                            # TODO: Handle ":" same as ""?
                            raise RangeSyntaxError(
                                u"colon (:) must be preceded and/or succeeded by number"
                            )
                        else:
                            # Handle "".
                            result = None
                    else:
                        assert colonFound
                        # Handle ":y".
                        result = (None, upper)
                elif colonFound:
                    # Handle "x:" and "x:y".
                    if (upper is not None) and (lower > upper):
                        raise RangeSyntaxError(
                            u"lower range %d must be greater or equal to upper range %d"
                            % (lower, upper))
                    result = (lower, upper)
                else:
                    # Handle "x".
                    result = (lower, lower)
                if result is not None:
                    for item in self._items:
                        if self._itemsOverlap(item, result):
                            # TODO: use _repr_item() or something to display item in error message.
                            raise RangeSyntaxError(
                                u"range items must not overlap: %r and %r" %
                                (self._repr_item(item),
                                 self._repr_item(result)))
                    self._items.append(result)
                if _tools.isEofToken(nextToken):
                    endReached = True
Example #9
0
    def _validatedCharacter(self, key, value):
        r"""
        A single character intended as value for data format property ``key``
        derived from ``value``, which can be:

        * a decimal or hex number (prefixed with "0x") referring to the ASCII/Unicode of the character
        * a string containing a single character such as "\t".
        * a symbolic name such as "Tab".

        Anything else yields a `DataFormatSyntaxError`.

        >>> format = DelimitedDataFormat()
        >>> format._validatedCharacter("x", "34")
        '"'
        >>> format._validatedCharacter("x", "9")
        '\t'
        >>> format._validatedCharacter("x", "0x9")
        '\t'
        >>> format._validatedCharacter("x", "Tab")
        '\t'
        >>> format._validatedCharacter("x", "\t")
        '\t'
        >>> format._validatedCharacter("x", "")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must be specified
        >>> format._validatedCharacter("x", "Tab Tab")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must describe a single character but is: 'Tab Tab'
        >>> format._validatedCharacter("x", "17.23")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: numeric value for data format property 'x' must be an integer but is: '17.23'
        >>> format._validatedCharacter("x", "Hugo")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: symbolic name 'Hugo' for data format property 'x' must be one of: 'cr', 'ff', 'lf', 'tab' or 'vt'
        >>> format._validatedCharacter("x", "( ")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '( '
        >>> format._validatedCharacter("x", "\"\\")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '"\\'
        >>> format._validatedCharacter("x", "\"abc\"")
        Traceback (most recent call last):
            ...
        DataFormatSyntaxError: text for data format property 'x' must be a single character but is: '"abc"'
        """
        # TODO: Consolidate code with `ranges.__init__()`.
        assert key
        assert value is not None
        if len(value) == 1 and (value < "0" or value > "9"):
            result = value
        else:
            result = None
            tokens = tokenize.generate_tokens(StringIO.StringIO(value).readline)
            nextToken = tokens.next()
            if _tools.isEofToken(nextToken):
                raise DataFormatSyntaxError(u"value for data format property %r must be specified" % key)
            nextType = nextToken[0]
            nextValue = nextToken[1]
            if nextType == token.NUMBER:
                try:
                    if nextValue[:2].lower() == "0x":
                        nextValue = nextValue[2:]
                        base = 16
                    else:
                        base = 10
                    longValue = long(nextValue, base)
                except ValueError:
                    raise DataFormatSyntaxError(u"numeric value for data format property %r must be an integer but is: %r" % (key, value))
            elif nextType == token.NAME:
                try:
                    longValue = tools.SYMBOLIC_NAMES_MAP[nextValue.lower()]
                except KeyError:
                    validSymbols = _tools.humanReadableList(sorted(tools.SYMBOLIC_NAMES_MAP.keys()))
                    raise DataFormatSyntaxError(u"symbolic name %r for data format property %r must be one of: %s" % (value, key, validSymbols))
            elif nextType == token.STRING:
                if len(nextValue) != 3:
                    raise DataFormatSyntaxError(u"text for data format property %r must be a single character but is: %r" % (key, value))
                leftQuote = nextValue[0]
                rightQuote = nextValue[2]
                assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote
                assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote
                longValue = ord(nextValue[1])
            else:
                raise DataFormatSyntaxError(u"value for data format property %r must a number, a single character or a symbolic name but is: %r" % (key, value))
            # Ensure there are no further tokens.
            nextToken = tokens.next()
            if not _tools.isEofToken(nextToken):
                raise DataFormatSyntaxError(u"value for data format property %r must describe a single character but is: %r" % (key, value))

            assert longValue is not None
            assert longValue >= 0
            result = chr(longValue)
        assert result is not None
        return result