def parse_empty_lines( config: BaseWhitespaceParserConfig, state: State, *, override_absolute_indent: Optional[str] = None, ) -> Sequence[EmptyLine]: # If override_absolute_indent is true, then we need to parse all lines up # to and including the last line that is indented at our level. These all # belong to the footer and not to the next line's leading_lines. All lines # that have indent=False and come after the last line where indent=True # do not belong to this node. state_for_line = State(state.line, state.column, state.absolute_indent, state.is_parenthesized) lines: List[Tuple[State, EmptyLine]] = [] while True: el = _parse_empty_line( config, state_for_line, override_absolute_indent=override_absolute_indent) if el is None: break # Store the updated state with the element we parsed. Then make a new state # clone for the next element. lines.append((state_for_line, el)) state_for_line = State( state_for_line.line, state_for_line.column, state.absolute_indent, state.is_parenthesized, ) if override_absolute_indent is not None: # We need to find the last element that is indented, and then split the list # at that point. for i in range(len(lines) - 1, -1, -1): if lines[i][1].indent: lines = lines[:(i + 1)] break else: # We didn't find any lines, throw them all away lines = [] if lines: # Update the state line and column to match the last line actually parsed. final_state: State = lines[-1][0] state.line = final_state.line state.column = final_state.column return [r[1] for r in lines]
def _parse_empty_line( config: BaseWhitespaceParserConfig, state: State, *, override_absolute_indent: Optional[str] = None, ) -> Optional[EmptyLine]: # begin speculative parsing speculative_state = State(state.line, state.column, state.absolute_indent, state.is_parenthesized) try: indent = _parse_indent( config, speculative_state, override_absolute_indent=override_absolute_indent) except Exception: # We aren't on a new line, speculative parsing failed return None whitespace = parse_simple_whitespace(config, speculative_state) comment = _parse_comment(config, speculative_state) newline = _parse_newline(config, speculative_state) if newline is None: # speculative parsing failed return None # speculative parsing succeeded state.line = speculative_state.line state.column = speculative_state.column # don't need to copy absolute_indent/is_parenthesized because they don't change. return EmptyLine(indent, whitespace, comment, newline)
def _parse_trailing_whitespace(config: BaseWhitespaceParserConfig, state: State) -> Optional[TrailingWhitespace]: # Begin speculative parsing speculative_state = State(state.line, state.column, state.absolute_indent, state.is_parenthesized) whitespace = parse_simple_whitespace(config, speculative_state) comment = _parse_comment(config, speculative_state) newline = _parse_newline(config, speculative_state) if newline is None: # Speculative parsing failed return None # Speculative parsing succeeded state.line = speculative_state.line state.column = speculative_state.column # don't need to copy absolute_indent/is_parenthesized because they don't change. return TrailingWhitespace(whitespace, comment, newline)
class WhitespaceParserTest(UnitTest): @data_provider( { "simple_whitespace_empty": { "parser": parse_simple_whitespace, "config": Config( lines=["not whitespace\n", " another line\n"], default_newline="\n" ), "start_state": State( line=1, column=0, absolute_indent="", is_parenthesized=False ), "end_state": State( line=1, column=0, absolute_indent="", is_parenthesized=False ), "expected_node": cst.SimpleWhitespace(""), }, "simple_whitespace_start_of_line": { "parser": parse_simple_whitespace, "config": Config( lines=["\t <-- There's some whitespace there\n"], default_newline="\n", ), "start_state": State( line=1, column=0, absolute_indent="", is_parenthesized=False ), "end_state": State( line=1, column=3, absolute_indent="", is_parenthesized=False ), "expected_node": cst.SimpleWhitespace("\t "), }, "simple_whitespace_end_of_line": { "parser": parse_simple_whitespace, "config": Config(lines=["prefix "], default_newline="\n"), "start_state": State( line=1, column=6, absolute_indent="", is_parenthesized=False ), "end_state": State( line=1, column=9, absolute_indent="", is_parenthesized=False ), "expected_node": cst.SimpleWhitespace(" "), }, "simple_whitespace_line_continuation": { "parser": parse_simple_whitespace, "config": Config( lines=["prefix \\\n", " \\\n", " # suffix\n"], default_newline="\n", ), "start_state": State( line=1, column=6, absolute_indent="", is_parenthesized=False ), "end_state": State( line=3, column=4, absolute_indent="", is_parenthesized=False ), "expected_node": cst.SimpleWhitespace(" \\\n \\\n "), }, "empty_lines_empty_list": { "parser": parse_empty_lines, "config": Config( lines=["this is not an empty line"], default_newline="\n" ), "start_state": State( line=1, column=0, absolute_indent="", is_parenthesized=False ), "end_state": State( line=1, column=0, absolute_indent="", is_parenthesized=False ), "expected_node": [], }, "empty_lines_single_line": { "parser": parse_empty_lines, "config": Config( lines=[" # comment\n", "this is not an empty line\n"], default_newline="\n", ), "start_state": State( line=1, column=0, absolute_indent=" ", is_parenthesized=False ), "end_state": State( line=2, column=0, absolute_indent=" ", is_parenthesized=False ), "expected_node": [ cst.EmptyLine( indent=True, whitespace=cst.SimpleWhitespace(""), comment=cst.Comment("# comment"), newline=cst.Newline(), ) ], }, "empty_lines_multiple": { "parser": parse_empty_lines, "config": Config( lines=[ "\n", " \n", " # comment with indent and whitespace\n", "# comment without indent\n", " # comment with no indent but some whitespace\n", ], default_newline="\n", ), "start_state": State( line=1, column=0, absolute_indent=" ", is_parenthesized=False ), "end_state": State( line=5, column=47, absolute_indent=" ", is_parenthesized=False ), "expected_node": [ cst.EmptyLine( indent=False, whitespace=cst.SimpleWhitespace(""), comment=None, newline=cst.Newline(), ), cst.EmptyLine( indent=True, whitespace=cst.SimpleWhitespace(""), comment=None, newline=cst.Newline(), ), cst.EmptyLine( indent=True, whitespace=cst.SimpleWhitespace(" "), comment=cst.Comment("# comment with indent and whitespace"), newline=cst.Newline(), ), cst.EmptyLine( indent=False, whitespace=cst.SimpleWhitespace(""), comment=cst.Comment("# comment without indent"), newline=cst.Newline(), ), cst.EmptyLine( indent=False, whitespace=cst.SimpleWhitespace(" "), comment=cst.Comment( "# comment with no indent but some whitespace" ), newline=cst.Newline(), ), ], }, "empty_lines_non_default_newline": { "parser": parse_empty_lines, "config": Config(lines=["\n", "\r\n", "\r"], default_newline="\n"), "start_state": State( line=1, column=0, absolute_indent="", is_parenthesized=False ), "end_state": State( line=3, column=1, absolute_indent="", is_parenthesized=False ), "expected_node": [ cst.EmptyLine( indent=True, whitespace=cst.SimpleWhitespace(""), comment=None, newline=cst.Newline(None), # default newline ), cst.EmptyLine( indent=True, whitespace=cst.SimpleWhitespace(""), comment=None, newline=cst.Newline("\r\n"), # non-default ), cst.EmptyLine( indent=True, whitespace=cst.SimpleWhitespace(""), comment=None, newline=cst.Newline("\r"), # non-default ), ], }, "trailing_whitespace": { "parser": parse_trailing_whitespace, "config": Config( lines=["some code # comment\n"], default_newline="\n" ), "start_state": State( line=1, column=9, absolute_indent="", is_parenthesized=False ), "end_state": State( line=1, column=21, absolute_indent="", is_parenthesized=False ), "expected_node": cst.TrailingWhitespace( whitespace=cst.SimpleWhitespace(" "), comment=cst.Comment("# comment"), newline=cst.Newline(), ), }, } ) def test_parsers( self, parser: Callable[[Config, State], _T], config: Config, start_state: State, end_state: State, expected_node: _T, ) -> None: # Uses internal `deep_equals` function instead of `CSTNode.deep_equals`, because # we need to compare sequences of nodes, and this is the easiest way. :/ parsed_node = parser(config, start_state) self.assertTrue( deep_equals(parsed_node, expected_node), msg=f"\n{parsed_node!r}\nis not deeply equal to \n{expected_node!r}", ) self.assertEqual(start_state, end_state)