def test_preserves_whitespace_in_quotes(self): self.assertEqual(['"foo \t\nbar"'], tokenize('"foo \t\nbar"')) self.assertEqual(['"foo bar"', 'baz'], tokenize('"foo bar" baz')) self.assertEqual(['"foo bar"', '"baz qux"'], tokenize('"foo bar" "baz qux"')) self.assertEqual(["'foo \t\nbar'"], tokenize("'foo \t\nbar'")) self.assertEqual(["'foo bar'", "baz"], tokenize("'foo bar' baz")) self.assertEqual(["'foo bar'", "'baz qux'"], tokenize("'foo bar' 'baz qux'"))
def parse(text: str) -> Optional['Statement']: text = text.strip() tokens = tokenize(text) if not tokens: return None if matches(tokens, [Match.identifier, ':']): return Label(tokens[0]) if matches(tokens[:2], [Match.identifier, '=']): return ConstantDefinition(tokens[0], Expression.build(tokens[2:])) if matches(tokens[:1], [Match.identifier]): if tokens[0] in Data.DATATYPES: return Data.build(tokens) else: return Instruction.build(tokens) if matches(tokens[:1], [';']): return None raise ValueError('unable to parse statement: %s', text)
def test_strips_whitespace(self): self.assertEqual(['foo'], tokenize('foo')) self.assertEqual(['foo'], tokenize(' foo')) self.assertEqual(['foo'], tokenize('foo ')) self.assertEqual(['foo'], tokenize(' foo ')) self.assertEqual(['foo'], tokenize('\tfoo\n'))
def test_bitshift(self): self.assertEqual(['<<'], tokenize('<<')) self.assertEqual(['>>'], tokenize('>>')) self.assertEqual(['<<', '<'], tokenize('<<<')) self.assertEqual(['>>', '>>', '>'], tokenize('>>>>>'))
def test_number(self): self.assertEqual(['42'], tokenize('42'))
def test_separates_punctuation(self): self.assertEqual(['foo', ',', 'bar'], tokenize('foo, bar')) self.assertEqual(['(', 'foo', 'bar', ')'], tokenize('(foo bar)')) self.assertEqual(['foo', '-', '>', '*', 'bar'], tokenize('foo->*bar'))
def test_allows_unclosed_quotes(self): self.assertEqual(['"foo bar'], tokenize('"foo bar')) self.assertEqual(["'foo bar"], tokenize("'foo bar"))
def test_escapes_quotes_with_backslash(self): self.assertEqual([r'"foo bar\" \"baz qux"'], tokenize(r'"foo bar\" \"baz qux"')) self.assertEqual([r"'foo bar\' \'baz qux'"], tokenize(r"'foo bar\' \'baz qux'"))
def test_splits_on_whitespace(self): self.assertEqual(['foo', 'bar'], tokenize('foo bar')) self.assertEqual(['foo', 'bar'], tokenize('foo bar')) self.assertEqual(['foo', 'bar'], tokenize('foo \t \nbar'))