def parse(self, message, characters=None): u"""Parse the message into a list of segments. :param characters: the control characters to use, if there is no UNA segment present :param message: The EDI message :rtype: """ # FIXME: DRY: use get_control_characters here? tokens = [] # If there is a UNA token, take the following 6 characters # unconditionally, save them as token and use it as control characters # for further parsing if message[0:3] == u'UNA': control_chars = message[3:9] tokens.append(Token(Token.Type.CONTENT, u'UNA')) tokens.append(Token(Token.Type.CTRL_CHARS, control_chars)) # remove the UNA segment from the string message = message[9:].lstrip(u"\r\n") self.characters = Characters.from_str(u'UNA' + control_chars) else: # if no UNA header present, use default control characters if characters is not None: self.characters = characters tokenizer = Tokenizer() tokens += tokenizer.get_tokens(message, self.characters) segments = self.convert_tokens_to_segments(tokens, self.characters) return segments
def test_quadruple_escape(self): self._assert_tokens(u"RFF+PD????:5", [ Token(Token.Type.CONTENT, u"RFF"), Token(Token.Type.DATA_SEPARATOR, u"+"), Token(Token.Type.CONTENT, u"PD??"), Token(Token.Type.COMPONENT_SEPARATOR, u":"), Token(Token.Type.CONTENT, u"5"), ])
def test_basic(self): self._assert_tokens(u"RFF+PD:50515", [ Token(Token.Type.CONTENT, u"RFF"), Token(Token.Type.DATA_SEPARATOR, u"+"), Token(Token.Type.CONTENT, u"PD"), Token(Token.Type.COMPONENT_SEPARATOR, u":"), Token(Token.Type.CONTENT, u"50515"), ])
def _assert_tokens(self, message, expected=None): if expected is None: expected = [] tokens = self._tokenizer.get_tokens( u"{}'".format(message), Characters()) expected.append(Token(Token.Type.TERMINATOR, u"'")) self.assertEqual(expected, tokens)
def test_ignore_whitespace(self): self._assert_tokens(u"RFF:5'\nDEF:6", [ Token(Token.Type.CONTENT, u"RFF"), Token(Token.Type.COMPONENT_SEPARATOR, u":"), Token(Token.Type.CONTENT, u"5"), Token(Token.Type.TERMINATOR, u"'"), Token(Token.Type.CONTENT, u"DEF"), Token(Token.Type.COMPONENT_SEPARATOR, u":"), Token(Token.Type.CONTENT, u"6"), ])
def get_next_token(self): u"""Get the next token from the message.""" if self.end_of_message(): return None # If we're not escaping this character then see if it's # a control character if not self.isEscaped: if self._char == self.characters.component_separator: self.store_current_char_and_read_next() return Token(Token.Type.COMPONENT_SEPARATOR, self.extract_stored_chars()) if self._char == self.characters.data_separator: self.store_current_char_and_read_next() return Token(Token.Type.DATA_SEPARATOR, self.extract_stored_chars()) if self._char == self.characters.segment_terminator: self.store_current_char_and_read_next() token = Token(Token.Type.TERMINATOR, self.extract_stored_chars()) # Ignore any trailing space after the end of the segment while self._char in [u"\r", u"\n"]: self.read_next_char() return token while not self.is_control_character(): if self.end_of_message(): raise RuntimeError(u"Unexpected end of EDI message") self.store_current_char_and_read_next() return Token(Token.Type.CONTENT, self.extract_stored_chars())
def test_triple_escape(self): self._assert_tokens(u"RFF+PD???:5", [ Token(Token.Type.CONTENT, u"RFF"), Token(Token.Type.DATA_SEPARATOR, u"+"), Token(Token.Type.CONTENT, u"PD?:5"), ])
def test_value(self): token = Token(Token.Type.CONTENT, u"ok") self.assertEqual(u"ok", token.value)
def test_type(self): token = Token(Token.Type.CONTENT, u"ok") self.assertEqual(Token.Type.CONTENT, token.type)