def parse(self, message, characters=None): u"""Parse the message into a list of segments. :param characters: the control characters to use, if there is no UNA segment present :param message: The EDI message :rtype: """ # FIXME: DRY: use get_control_characters here? tokens = [] # If there is a UNA token, take the following 6 characters # unconditionally, save them as token and use it as control characters # for further parsing if message[0:3] == u'UNA': control_chars = message[3:9] tokens.append(Token(Token.Type.CONTENT, u'UNA')) tokens.append(Token(Token.Type.CTRL_CHARS, control_chars)) # remove the UNA segment from the string message = message[9:].lstrip(u"\r\n") self.characters = Characters.from_str(u'UNA' + control_chars) else: # if no UNA header present, use default control characters if characters is not None: self.characters = characters tokenizer = Tokenizer() tokens += tokenizer.get_tokens(message, self.characters) segments = self.convert_tokens_to_segments(tokens, self.characters) return segments
def test_quadruple_escape(self): self._assert_tokens("RFF+PD????:5", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD??"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), ])
def test_basic(self): self._assert_tokens("RFF+PD:50515", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "50515"), ])
def test_triple_escape(): _assert_tokens( "RFF+PD???:5'", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD?:5"), Token(Token.Type.TERMINATOR, "'"), ], )
def test_starts_with_escape(): _assert_tokens( "DTM+?+0'", [ Token(Token.Type.CONTENT, "DTM"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "+0"), Token(Token.Type.TERMINATOR, "'"), ], )
def expected_crlf(): return [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), Token(Token.Type.TERMINATOR, "'"), Token(Token.Type.CONTENT, "DEF"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "6"), Token(Token.Type.TERMINATOR, "'"), ]
def test_ignore_whitespace(self): self._assert_tokens("RFF:5'\nDEF:6", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), Token(Token.Type.TERMINATOR, "'"), Token(Token.Type.CONTENT, "DEF"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "6"), ])
def get_next_token(self) -> Optional[Token]: """Get the next token from the message.""" # If we're not escaping this character then see if it's # a control character token_type = not self.isEscaped and self.token_selector.get(self._char) if token_type: self.store_current_char_and_read_next() token = Token(token_type, self.extract_stored_chars()) if token_type == Token.Type.TERMINATOR: while self._char in self.characters.line_terminators: self.read_next_char() return token while not self.is_control_character(): if self.end_of_message(): raise RuntimeError("Unexpected end of EDI message") self.store_current_char_and_read_next() return Token(Token.Type.CONTENT, self.extract_stored_chars())
def get_next_token(self) -> Token or None: """Get the next token from the message.""" if self.end_of_message(): return None # If we're not escaping this character then see if it's # a control character if not self.isEscaped: if self._char == self.characters.component_separator: self.store_current_char_and_read_next() return Token(Token.Type.COMPONENT_SEPARATOR, self.extract_stored_chars()) if self._char == self.characters.data_separator: self.store_current_char_and_read_next() return Token(Token.Type.DATA_SEPARATOR, self.extract_stored_chars()) if self._char == self.characters.segment_terminator: self.store_current_char_and_read_next() token = Token(Token.Type.TERMINATOR, self.extract_stored_chars()) # Ignore any trailing space after the end of the segment while self._char in ["\r", "\n"]: self.read_next_char() return token while not self.is_control_character(): if self.end_of_message(): raise RuntimeError("Unexpected end of EDI message") self.store_current_char_and_read_next() return Token(Token.Type.CONTENT, self.extract_stored_chars())
def test_quadruple_escape(): _assert_tokens( "RFF+PD????:5'", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD??"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "5"), Token(Token.Type.TERMINATOR, "'"), ], )
def test_basic(): _assert_tokens( "RFF+PD:50515'", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD"), Token(Token.Type.COMPONENT_SEPARATOR, ":"), Token(Token.Type.CONTENT, "50515"), Token(Token.Type.TERMINATOR, "'"), ], )
def test_triple_escape(self): self._assert_tokens("RFF+PD???:5", [ Token(Token.Type.CONTENT, "RFF"), Token(Token.Type.DATA_SEPARATOR, "+"), Token(Token.Type.CONTENT, "PD?:5"), ])
def _assert_tokens(self, message, expected=None): if expected is None: expected = [] tokens = self._tokenizer.get_tokens("{}'".format(message), Characters()) expected.append(Token(Token.Type.TERMINATOR, "'")) self.assertEqual(expected, tokens)
def test_value(self): token = Token(Token.Type.CONTENT, "ok") self.assertEqual("ok", token.value)
def test_type(self): token = Token(Token.Type.CONTENT, "ok") self.assertEqual(Token.Type.CONTENT, token.type)