def parse(self, message: str, characters: Characters = None) -> Generator[Segment, Any, None]: """Parse the message into a list of segments. :param characters: the control characters to use, if there is no UNA segment present :param message: The EDI message :rtype: """ # If there is a UNA, take the following 6 characters # unconditionally, save them, strip them, and make control Characters() # for further parsing if message[0:3] == "UNA": self.characters = Characters.from_str("UNA" + message[3:9]) # remove the UNA segment from the string message = message[9:].lstrip("\r\n") else: # if no UNA header present, use default control characters if characters is not None: self.characters = characters tokenizer = Tokenizer() return self.convert_tokens_to_segments( tokenizer.get_tokens(message, self.characters), self.characters)
def parse(self, message, characters=None): u"""Parse the message into a list of segments. :param characters: the control characters to use, if there is no UNA segment present :param message: The EDI message :rtype: """ # FIXME: DRY: use get_control_characters here? tokens = [] # If there is a UNA token, take the following 6 characters # unconditionally, save them as token and use it as control characters # for further parsing if message[0:3] == u'UNA': control_chars = message[3:9] tokens.append(Token(Token.Type.CONTENT, u'UNA')) tokens.append(Token(Token.Type.CTRL_CHARS, control_chars)) # remove the UNA segment from the string message = message[9:].lstrip(u"\r\n") self.characters = Characters.from_str(u'UNA' + control_chars) else: # if no UNA header present, use default control characters if characters is not None: self.characters = characters tokenizer = Tokenizer() tokens += tokenizer.get_tokens(message, self.characters) segments = self.convert_tokens_to_segments(tokens, self.characters) return segments
class TokenizerTest(unittest.TestCase): def setUp(self): self._tokenizer = Tokenizer() def _assert_tokens(self, message, expected=None): if expected is None: expected = [] tokens = self._tokenizer.get_tokens("{}'".format(message), Characters()) expected.append(Token(Token.Type.TERMINATOR, "'")) self.assertEqual(expected, tokens) def test_basic(self): self._assert_tokens("RFF+PD:50515", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "50515"), ]) def test_escape(self): self._assert_tokens("RFF+PD?:5", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD:5"), ]) def test_double_escape(self): self._assert_tokens("RFF+PD??:5", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD?"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), ]) def test_triple_escape(self): self._assert_tokens("RFF+PD???:5", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD?:5"), ]) def test_quadruple_escape(self): self._assert_tokens("RFF+PD????:5", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD??"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), ]) def test_ignore_whitespace(self): self._assert_tokens("RFF:5'\nDEF:6", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), Token(Token.Type.TERMINATOR, "'"), Token(Token.Type.CONTENT, "DEF"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "6"), ]) def test_no_terminator(self): with self.assertRaises(RuntimeError) as cm: self._tokenizer.get_tokens("TEST", Characters()) self.assertEqual(str(cm.exception), "Unexpected end of EDI message")