def __init__(self, fieldName, isAllowedToBeEmpty, length, rule, dataFormat): super(ChoiceFieldFormat, self).__init__(fieldName, isAllowedToBeEmpty, length, rule, dataFormat, emptyValue="") self.choices = [] # Split rule into tokens, ignoring white space. tokens = _tools.tokenizeWithoutSpace(rule) # Extract choices from rule tokens. previousToky = None toky = tokens.next() while not _tools.isEofToken(toky): if _tools.isCommaToken(toky): # Handle comma after comma without choice. if previousToky: previousTokyText = previousToky[1] else: previousTokyText = None raise FieldSyntaxError(u"choice value must precede a comma (,) but found: %r" % previousTokyText) choice = _tools.tokenText(toky) if not choice: raise FieldSyntaxError( u"choice field must be allowed to be empty instead of containing an empty choice" ) self.choices.append(choice) toky = tokens.next() if not _tools.isEofToken(toky): if not _tools.isCommaToken(toky): raise FieldSyntaxError(u"comma (,) must follow choice value %r but found: %r" % (choice, toky[1])) # Process next choice after comma. toky = tokens.next() if _tools.isEofToken(toky): raise FieldSyntaxError(u"trailing comma (,) must be removed") if not self.isAllowedToBeEmpty and not self.choices: raise FieldSyntaxError(u"choice field without any choices must be allowed to be empty")
def __init__(self, description, rule, availableFieldNames, location=None): super(IsUniqueCheck, self).__init__(description, rule, availableFieldNames, location) self.fieldNamesToCheck = [] # Extract field names to check from rule. ruleReadLine = StringIO.StringIO(rule).readline toky = tokenize.generate_tokens(ruleReadLine) afterComma = True nextToken = toky.next() uniqueFieldNames = set() while not _tools.isEofToken(nextToken): tokenType = nextToken[0] tokenValue = nextToken[1] if afterComma: if tokenType != tokenize.NAME: raise CheckSyntaxError(u"field name must contain only ASCII letters, numbers and underscores (_) " + "but found: %r [token type=%r]" % (tokenValue, tokenType)) try: fields.getFieldNameIndex(tokenValue, availableFieldNames) if tokenValue in uniqueFieldNames: raise CheckSyntaxError(u"duplicate field name for unique check must be removed: %s" % tokenValue) uniqueFieldNames.add(tokenValue) except fields.FieldLookupError, error: raise CheckSyntaxError(unicode(error)) self.fieldNamesToCheck.append(tokenValue) elif not _tools.isCommaToken(nextToken): raise CheckSyntaxError(u"after field name a comma (,) must follow but found: %r" % (tokenValue))
def __init__(self, fieldName, isAllowedToBeEmpty, length, rule, dataFormat): super(ChoiceFieldFormat, self).__init__(fieldName, isAllowedToBeEmpty, length, rule, dataFormat, emptyValue="") self.choices = [] # Split rule into tokens, ignoring white space. tokens = _tools.tokenizeWithoutSpace(rule) # Extract choices from rule tokens. previousToky = None toky = tokens.next() while not _tools.isEofToken(toky): if _tools.isCommaToken(toky): # Handle comma after comma without choice. if previousToky: previousTokyText = previousToky[1] else: previousTokyText = None raise FieldSyntaxError( u"choice value must precede a comma (,) but found: %r" % previousTokyText) choice = _tools.tokenText(toky) if not choice: raise FieldSyntaxError( u"choice field must be allowed to be empty instead of containing an empty choice" ) self.choices.append(choice) toky = tokens.next() if not _tools.isEofToken(toky): if not _tools.isCommaToken(toky): raise FieldSyntaxError( u"comma (,) must follow choice value %r but found: %r" % (choice, toky[1])) # Process next choice after comma. toky = tokens.next() if _tools.isEofToken(toky): raise FieldSyntaxError( u"trailing comma (,) must be removed") if not self.isAllowedToBeEmpty and not self.choices: raise FieldSyntaxError( u"choice field without any choices must be allowed to be empty" )
def validatedFieldName(supposedFieldName, location=None): """ Same as ``supposedFieldName`` except with surrounding white space removed, provided that it describes a valid field name. Otherwise, raise a `FieldSyntaxError` pointing to ``location``. """ tokens = _tools.tokenizeWithoutSpace(supposedFieldName) tokenType, result, _, _, _ = tokens.next() if tokenType != token.NAME: message = ( u"field name must be a valid Python name consisting of ASCII letters, underscore (%r) and digits but is: %r" % ("_", result) ) raise FieldSyntaxError(message, location) if keyword.iskeyword(result): raise FieldSyntaxError(u"field name must not be a Python keyword but is: %r" % result, location) toky = tokens.next() if not _tools.isEofToken(toky): raise FieldSyntaxError(u"field name must be a single word but is: %r" % supposedFieldName, location) return result
def validatedFieldName(supposedFieldName, location=None): """ Same as ``supposedFieldName`` except with surrounding white space removed, provided that it describes a valid field name. Otherwise, raise a `FieldSyntaxError` pointing to ``location``. """ tokens = _tools.tokenizeWithoutSpace(supposedFieldName) tokenType, result, _, _, _ = tokens.next() if tokenType != token.NAME: message = u"field name must be a valid Python name consisting of ASCII letters, underscore (%r) and digits but is: %r" % ( "_", result) raise FieldSyntaxError(message, location) if keyword.iskeyword(result): raise FieldSyntaxError( u"field name must not be a Python keyword but is: %r" % result, location) toky = tokens.next() if not _tools.isEofToken(toky): raise FieldSyntaxError( u"field name must be a single word but is: %r" % supposedFieldName, location) return result
def _validatedCharacter(self, key, value): r""" A single character intended as value for data format property ``key`` derived from ``value``, which can be: * a decimal or hex number (prefixed with "0x") referring to the ASCII/Unicode of the character * a string containing a single character such as "\t". * a symbolic name such as "Tab". Anything else yields a `DataFormatSyntaxError`. >>> format = DelimitedDataFormat() >>> format._validatedCharacter("x", "34") '"' >>> format._validatedCharacter("x", "9") '\t' >>> format._validatedCharacter("x", "0x9") '\t' >>> format._validatedCharacter("x", "Tab") '\t' >>> format._validatedCharacter("x", "\t") '\t' >>> format._validatedCharacter("x", "") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must be specified >>> format._validatedCharacter("x", "Tab Tab") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must describe a single character but is: 'Tab Tab' >>> format._validatedCharacter("x", "17.23") Traceback (most recent call last): ... DataFormatSyntaxError: numeric value for data format property 'x' must be an integer but is: '17.23' >>> format._validatedCharacter("x", "Hugo") Traceback (most recent call last): ... DataFormatSyntaxError: symbolic name 'Hugo' for data format property 'x' must be one of: 'cr', 'ff', 'lf', 'tab' or 'vt' >>> format._validatedCharacter("x", "( ") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '( ' >>> format._validatedCharacter("x", "\"\\") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '"\\' >>> format._validatedCharacter("x", "\"abc\"") Traceback (most recent call last): ... DataFormatSyntaxError: text for data format property 'x' must be a single character but is: '"abc"' """ # TODO: Consolidate code with `ranges.__init__()`. assert key assert value is not None if len(value) == 1 and (value < "0" or value > "9"): result = value else: result = None tokens = tokenize.generate_tokens( StringIO.StringIO(value).readline) nextToken = tokens.next() if _tools.isEofToken(nextToken): raise DataFormatSyntaxError( u"value for data format property %r must be specified" % key) nextType = nextToken[0] nextValue = nextToken[1] if nextType == token.NUMBER: try: if nextValue[:2].lower() == "0x": nextValue = nextValue[2:] base = 16 else: base = 10 longValue = long(nextValue, base) except ValueError: raise DataFormatSyntaxError( u"numeric value for data format property %r must be an integer but is: %r" % (key, value)) elif nextType == token.NAME: try: longValue = tools.SYMBOLIC_NAMES_MAP[nextValue.lower()] except KeyError: validSymbols = _tools.humanReadableList( sorted(tools.SYMBOLIC_NAMES_MAP.keys())) raise DataFormatSyntaxError( u"symbolic name %r for data format property %r must be one of: %s" % (value, key, validSymbols)) elif nextType == token.STRING: if len(nextValue) != 3: raise DataFormatSyntaxError( u"text for data format property %r must be a single character but is: %r" % (key, value)) leftQuote = nextValue[0] rightQuote = nextValue[2] assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote longValue = ord(nextValue[1]) else: raise DataFormatSyntaxError( u"value for data format property %r must a number, a single character or a symbolic name but is: %r" % (key, value)) # Ensure there are no further tokens. nextToken = tokens.next() if not _tools.isEofToken(nextToken): raise DataFormatSyntaxError( u"value for data format property %r must describe a single character but is: %r" % (key, value)) assert longValue is not None assert longValue >= 0 result = chr(longValue) assert result is not None return result
def __init__(self, text, default=None): """ Setup a range as specified by ``text``. ``text`` must be of the form "lower:upper" or "limit". In case ``text`` is empty (""), any value will be accepted by `validate()`. For example, "1:40" accepts values between 1 and 40. ``default`` is an alternative text to use in case ``text`` is ``None`` or empty. """ assert default is None or default.strip(), u"default=%r" % default # Find out if a `text` has been specified and if not, use optional `default` instead. hasText = (text is not None) and text.strip() if not hasText and default is not None: text = default hasText = True if not hasText: # Use empty ranges. self._description = None self._items = None else: self._description = text self._items = [] # TODO: Consolidate code with `DelimitedDataFormat._validatedCharacter()`. tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline) endReached = False while not endReached: lower = None upper = None colonFound = False afterHyphen = False nextToken = tokens.next() while not _tools.isEofToken(nextToken) and not _tools.isCommaToken(nextToken): nextType = nextToken[0] nextValue = nextToken[1] if nextType in (token.NAME, token.NUMBER, token.STRING): if nextType == token.NUMBER: try: if nextValue[:2].lower() == "0x": nextValue = nextValue[2:] base = 16 else: base = 10 longValue = long(nextValue, base) except ValueError: raise RangeSyntaxError(u"number must be an integer but is: %r" % nextValue) if afterHyphen: longValue = - 1 * longValue afterHyphen = False elif nextType == token.NAME: try: longValue = tools.SYMBOLIC_NAMES_MAP[nextValue.lower()] except KeyError: validSymbols = _tools.humanReadableList(sorted(tools.SYMBOLIC_NAMES_MAP.keys())) raise RangeSyntaxError(u"symbolic name %r must be one of: %s" % (nextValue, validSymbols)) elif nextType == token.STRING: if len(nextValue) != 3: raise RangeSyntaxError(u"text for range must contain a single character but is: %r" % nextValue) leftQuote = nextValue[0] rightQuote = nextValue[2] assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote longValue = ord(nextValue[1]) if colonFound: if upper is None: upper = longValue else: raise RangeSyntaxError("range must have at most lower and upper limit but found another number: %r" % nextValue) elif lower is None: lower = longValue else: raise RangeSyntaxError(u"number must be followed by colon (:) but found: %r" % nextValue) elif afterHyphen: raise RangeSyntaxError(u"hyphen (-) must be followed by number but found: %r" % nextValue) elif (nextType == token.OP) and (nextValue == "-"): afterHyphen = True elif (nextType == token.OP) and (nextValue == ":"): if colonFound: raise RangeSyntaxError(u"range item must contain at most one colon (:)") colonFound = True else: message = u"range must be specified using integer numbers, text, symbols and colon (:) but found: %r [token type: %r]" % (nextValue, nextType) raise RangeSyntaxError(message) nextToken = tokens.next() if afterHyphen: raise RangeSyntaxError(u"hyphen (-) at end must be followed by number") # Decide upon the result. if (lower is None): if (upper is None): if colonFound: # Handle ":". # TODO: Handle ":" same as ""? raise RangeSyntaxError(u"colon (:) must be preceded and/or succeeded by number") else: # Handle "". result = None else: assert colonFound # Handle ":y". result = (None, upper) elif colonFound: # Handle "x:" and "x:y". if (upper is not None) and (lower > upper): raise RangeSyntaxError(u"lower range %d must be greater or equal to upper range %d" % (lower, upper)) result = (lower, upper) else: # Handle "x". result = (lower, lower) if result is not None: for item in self._items: if self._itemsOverlap(item, result): # TODO: use _repr_item() or something to display item in error message. raise RangeSyntaxError(u"range items must not overlap: %r and %r" % (self._repr_item(item), self._repr_item(result))) self._items.append(result) if _tools.isEofToken(nextToken): endReached = True
def __init__(self, text, default=None): """ Setup a range as specified by ``text``. ``text`` must be of the form "lower:upper" or "limit". In case ``text`` is empty (""), any value will be accepted by `validate()`. For example, "1:40" accepts values between 1 and 40. ``default`` is an alternative text to use in case ``text`` is ``None`` or empty. """ assert default is None or default.strip(), u"default=%r" % default # Find out if a `text` has been specified and if not, use optional `default` instead. hasText = (text is not None) and text.strip() if not hasText and default is not None: text = default hasText = True if not hasText: # Use empty ranges. self._description = None self._items = None else: self._description = text self._items = [] # TODO: Consolidate code with `DelimitedDataFormat._validatedCharacter()`. tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline) endReached = False while not endReached: lower = None upper = None colonFound = False afterHyphen = False nextToken = tokens.next() while not _tools.isEofToken( nextToken) and not _tools.isCommaToken(nextToken): nextType = nextToken[0] nextValue = nextToken[1] if nextType in (token.NAME, token.NUMBER, token.STRING): if nextType == token.NUMBER: try: if nextValue[:2].lower() == "0x": nextValue = nextValue[2:] base = 16 else: base = 10 longValue = long(nextValue, base) except ValueError: raise RangeSyntaxError( u"number must be an integer but is: %r" % nextValue) if afterHyphen: longValue = -1 * longValue afterHyphen = False elif nextType == token.NAME: try: longValue = tools.SYMBOLIC_NAMES_MAP[ nextValue.lower()] except KeyError: validSymbols = _tools.humanReadableList( sorted(tools.SYMBOLIC_NAMES_MAP.keys())) raise RangeSyntaxError( u"symbolic name %r must be one of: %s" % (nextValue, validSymbols)) elif nextType == token.STRING: if len(nextValue) != 3: raise RangeSyntaxError( u"text for range must contain a single character but is: %r" % nextValue) leftQuote = nextValue[0] rightQuote = nextValue[2] assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote longValue = ord(nextValue[1]) if colonFound: if upper is None: upper = longValue else: raise RangeSyntaxError( "range must have at most lower and upper limit but found another number: %r" % nextValue) elif lower is None: lower = longValue else: raise RangeSyntaxError( u"number must be followed by colon (:) but found: %r" % nextValue) elif afterHyphen: raise RangeSyntaxError( u"hyphen (-) must be followed by number but found: %r" % nextValue) elif (nextType == token.OP) and (nextValue == "-"): afterHyphen = True elif (nextType == token.OP) and (nextValue == ":"): if colonFound: raise RangeSyntaxError( u"range item must contain at most one colon (:)" ) colonFound = True else: message = u"range must be specified using integer numbers, text, symbols and colon (:) but found: %r [token type: %r]" % ( nextValue, nextType) raise RangeSyntaxError(message) nextToken = tokens.next() if afterHyphen: raise RangeSyntaxError( u"hyphen (-) at end must be followed by number") # Decide upon the result. if (lower is None): if (upper is None): if colonFound: # Handle ":". # TODO: Handle ":" same as ""? raise RangeSyntaxError( u"colon (:) must be preceded and/or succeeded by number" ) else: # Handle "". result = None else: assert colonFound # Handle ":y". result = (None, upper) elif colonFound: # Handle "x:" and "x:y". if (upper is not None) and (lower > upper): raise RangeSyntaxError( u"lower range %d must be greater or equal to upper range %d" % (lower, upper)) result = (lower, upper) else: # Handle "x". result = (lower, lower) if result is not None: for item in self._items: if self._itemsOverlap(item, result): # TODO: use _repr_item() or something to display item in error message. raise RangeSyntaxError( u"range items must not overlap: %r and %r" % (self._repr_item(item), self._repr_item(result))) self._items.append(result) if _tools.isEofToken(nextToken): endReached = True
def _validatedCharacter(self, key, value): r""" A single character intended as value for data format property ``key`` derived from ``value``, which can be: * a decimal or hex number (prefixed with "0x") referring to the ASCII/Unicode of the character * a string containing a single character such as "\t". * a symbolic name such as "Tab". Anything else yields a `DataFormatSyntaxError`. >>> format = DelimitedDataFormat() >>> format._validatedCharacter("x", "34") '"' >>> format._validatedCharacter("x", "9") '\t' >>> format._validatedCharacter("x", "0x9") '\t' >>> format._validatedCharacter("x", "Tab") '\t' >>> format._validatedCharacter("x", "\t") '\t' >>> format._validatedCharacter("x", "") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must be specified >>> format._validatedCharacter("x", "Tab Tab") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must describe a single character but is: 'Tab Tab' >>> format._validatedCharacter("x", "17.23") Traceback (most recent call last): ... DataFormatSyntaxError: numeric value for data format property 'x' must be an integer but is: '17.23' >>> format._validatedCharacter("x", "Hugo") Traceback (most recent call last): ... DataFormatSyntaxError: symbolic name 'Hugo' for data format property 'x' must be one of: 'cr', 'ff', 'lf', 'tab' or 'vt' >>> format._validatedCharacter("x", "( ") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '( ' >>> format._validatedCharacter("x", "\"\\") Traceback (most recent call last): ... DataFormatSyntaxError: value for data format property 'x' must a number, a single character or a symbolic name but is: '"\\' >>> format._validatedCharacter("x", "\"abc\"") Traceback (most recent call last): ... DataFormatSyntaxError: text for data format property 'x' must be a single character but is: '"abc"' """ # TODO: Consolidate code with `ranges.__init__()`. assert key assert value is not None if len(value) == 1 and (value < "0" or value > "9"): result = value else: result = None tokens = tokenize.generate_tokens(StringIO.StringIO(value).readline) nextToken = tokens.next() if _tools.isEofToken(nextToken): raise DataFormatSyntaxError(u"value for data format property %r must be specified" % key) nextType = nextToken[0] nextValue = nextToken[1] if nextType == token.NUMBER: try: if nextValue[:2].lower() == "0x": nextValue = nextValue[2:] base = 16 else: base = 10 longValue = long(nextValue, base) except ValueError: raise DataFormatSyntaxError(u"numeric value for data format property %r must be an integer but is: %r" % (key, value)) elif nextType == token.NAME: try: longValue = tools.SYMBOLIC_NAMES_MAP[nextValue.lower()] except KeyError: validSymbols = _tools.humanReadableList(sorted(tools.SYMBOLIC_NAMES_MAP.keys())) raise DataFormatSyntaxError(u"symbolic name %r for data format property %r must be one of: %s" % (value, key, validSymbols)) elif nextType == token.STRING: if len(nextValue) != 3: raise DataFormatSyntaxError(u"text for data format property %r must be a single character but is: %r" % (key, value)) leftQuote = nextValue[0] rightQuote = nextValue[2] assert leftQuote in "\"\'", u"leftQuote=%r" % leftQuote assert rightQuote in "\"\'", u"rightQuote=%r" % rightQuote longValue = ord(nextValue[1]) else: raise DataFormatSyntaxError(u"value for data format property %r must a number, a single character or a symbolic name but is: %r" % (key, value)) # Ensure there are no further tokens. nextToken = tokens.next() if not _tools.isEofToken(nextToken): raise DataFormatSyntaxError(u"value for data format property %r must describe a single character but is: %r" % (key, value)) assert longValue is not None assert longValue >= 0 result = chr(longValue) assert result is not None return result