def test_abstraction_returns_expected(self, source, expected_starts_and_kinds): tokenizer = java_tokenizer.JavaTokenizer() agnostic = tokenizer.tokenize_and_abstract(source) actual_starts_and_kinds = tuple( (m.metadata.start.line, m.metadata.start.column, m.kind) for m in agnostic[:-1]) self.assertSequenceEqual(expected_starts_and_kinds, actual_starts_and_kinds)
def test_tokenization_returns_expected_newlines(self, source, expected_newline_lines): tokenizer = java_tokenizer.JavaTokenizer() # Produce multi-tokens, right before flattening. agnostic = tokenizer.tokenize_and_abstract(source) conditioned = tokenizer.condition_full_tokens(agnostic) multi_tokens = tokenizer.subtokenize_full_tokens(conditioned)[:-1] actual_newline_lines = tuple( m.metadata.start.line for m in multi_tokens if m.kind == unified_tokenizer.TokenKind.NEWLINE) self.assertSequenceEqual(expected_newline_lines, actual_newline_lines)
def test_tokenization_returns_expected_positions(self, source, expected_lines, expected_columns): tokenizer = java_tokenizer.JavaTokenizer() # Produce multi-tokens, right before flattening. agnostic = tokenizer.tokenize_and_abstract(source) conditioned = tokenizer.condition_full_tokens(agnostic) multi_tokens = tokenizer.subtokenize_full_tokens(conditioned)[:-1] actual_lines_and_columns = tuple( (m.metadata.start.line, m.metadata.start.column) for m in multi_tokens) expected_lines_and_columns = tuple( zip(expected_lines, expected_columns)) self.assertSequenceEqual(expected_lines_and_columns, actual_lines_and_columns)
def test_tokenize_returns_expected(self, source, expected): tokenizer = java_tokenizer.JavaTokenizer() actual = tokenizer.tokenize(source) self.assertSequenceEqual(expected, actual)