def test__parser__common_marker(): """Test construction and comparison of markers.""" # test making one from fresh fp1 = FilePositionMarker.from_fresh() fp2 = fp1.advance_by("abc") fp3 = fp2.advance_by("def\nghi\njlk") fp4 = fp3.advance_by("mno", idx=1) # check comparisons assert fp1 == FilePositionMarker(1, 1, 1, 0) assert fp4 > fp3 > fp2 > fp1 assert fp1 < fp2 < fp3 < fp4 # Check advance works without newline assert fp2 == FilePositionMarker(1, 1, 4, 3) assert fp3 == FilePositionMarker(1, 3, 4, 14) assert fp4 == FilePositionMarker(2, 3, 7, 17)
def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_buff = "" for elem in elems: if set(elem) <= {" ", "\t"}: cls = RawSegment.make(" ", name="whitespace", type="whitespace") elif set(elem) <= {"\n"}: cls = RawSegment.make("\n", name="newline", type="newline") elif elem == "(": cls = RawSegment.make("(", name="bracket_open", _is_code=True) elif elem == ")": cls = RawSegment.make(")", name="bracket_close", _is_code=True) elif elem.startswith("--"): cls = RawSegment.make("--", name="inline_comment") elif elem.startswith('"'): cls = RawSegment.make('"', name="double_quote", _is_code=True) elif elem.startswith("'"): cls = RawSegment.make("'", name="single_quote", _is_code=True) else: cls = RawSegment.make("", _is_code=True) buff.append( cls(elem, FilePositionMarker.from_fresh().advance_by(raw_buff))) raw_buff += elem return tuple(buff) # Make sure we return a tuple
def test__cli__formatters__violation(): """Test formatting violations. NB Position is 1 + start_pos. """ s = RawSegment("foobarbar", FilePositionMarker(0, 20, 11, 100)) r = RuleGhost("A", "DESC") v = SQLLintError(segment=s, rule=r) f = format_violation(v) assert escape_ansi(f) == "L: 20 | P: 11 | A | DESC"
def test__markers__enriched_marker_format(): """Test formatting of enriched markers.""" fp1 = EnrichedFilePositionMarker( 1, 1, 1, 0, slice(0, 1), slice(0, 1), True, source_pos_marker=FilePositionMarker(1, 2, 3, 0), ) # Check Formatting Style assert str(fp1) == "[0](1, 2, 3)"
def __init__(self, pos_marker=None): """For the meta segment we override the init method. For something without content, the content doesn't make sense. The pos_marker, will be matched with the following segment, but meta segments are ignored during fixes so it's ok in this sense. We need the pos marker later for dealing with repairs. """ self._raw = "" # We strip the position marker, so that when fixing it's # skipped and not considered. If no position marker is given # then give it a fresh one - it will need to be realigned # before it's useful. if pos_marker: self.pos_marker = pos_marker.strip() else: self.pos_marker = FilePositionMarker()
def lex( self, raw: Union[str, TemplatedFile] ) -> Tuple[Tuple[BaseSegment, ...], List[SQLLexError]]: """Take a string or TemplatedFile and return segments. If we fail to match the *whole* string, then we must have found something that we cannot lex. If that happens we should package it up as unlexable and keep track of the exceptions. """ start_pos = FilePositionMarker() segment_buff = () violations = [] # Handle potential TemplatedFile for now str_buff = str(raw) while True: res = self.matcher.match(str_buff, start_pos) segment_buff += res.segments if len(res.new_string) > 0: violations.append( SQLLexError( "Unable to lex characters: '{0!r}...'".format( res.new_string[:10] ), pos=res.new_pos, ) ) resort_res = self.last_resort_lexer.match(res.new_string, res.new_pos) if not resort_res: # If we STILL can't match, then just panic out. raise violations[-1] str_buff = resort_res.new_string start_pos = resort_res.new_pos segment_buff += resort_res.segments else: break # Enrich the segments if we can using the templated file if isinstance(raw, TemplatedFile): return self.enrich_segments(segment_buff, raw), violations else: return segment_buff, violations
def enrich_segments( segment_buff: Tuple[BaseSegment, ...], templated_file: TemplatedFile ) -> Tuple[BaseSegment, ...]: """Enrich the segments using the templated file. We use the mapping in the template to provide positions in the source file. """ # Make a new buffer to hold the enriched segments. # We need a new buffer to hold the new meta segments # introduced. new_segment_buff = [] # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info( "Enriching Segments. Source-only slices: %s", source_only_slices ) for segment in segment_buff: templated_slice = slice( segment.pos_marker.char_pos, segment.pos_marker.char_pos + len(segment.raw), ) source_slice = templated_file.templated_slice_to_source_slice( templated_slice ) # At this stage, templated slices will be INCLUDED in the source slice, # so we should consider whether we've captured any. If we have then # we need to re-evaluate whether it's a literal or not. for source_only_slice in source_only_slices: if source_only_slice.source_idx > source_slice.start: break elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, templated_slice.start, ) # Adjust the source slice accordingly. source_slice = slice( source_only_slice.end_source_idx(), source_slice.stop ) # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): new_segment_buff.append( Dedent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) # Always add a placeholder new_segment_buff.append( TemplateSegment( pos_marker=segment.pos_marker, source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, ) ) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): new_segment_buff.append( Indent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) source_line, source_pos = templated_file.get_line_pos_of_char_pos( source_slice.start ) # Recalculate is_literal is_literal = templated_file.is_source_slice_literal(source_slice) segment.pos_marker = EnrichedFilePositionMarker( statement_index=segment.pos_marker.statement_index, line_no=segment.pos_marker.line_no, line_pos=segment.pos_marker.line_pos, char_pos=segment.pos_marker.char_pos, templated_slice=templated_slice, source_slice=source_slice, is_literal=is_literal, source_pos_marker=FilePositionMarker( segment.pos_marker.statement_index, source_line, source_pos, source_slice.start, ), ) new_segment_buff.append(segment) lexer_logger.debug("Enriched Segments:") for seg in new_segment_buff: lexer_logger.debug( "\tTmp: %s\tSrc: %s\tSeg: %s", getattr(seg.pos_marker, "templated_slice", None), getattr(seg.pos_marker, "source_slice", None), seg, ) return tuple(new_segment_buff)
def test__markers__common_marker_format(): """Test formatting of markers.""" fp1 = FilePositionMarker(1, 2, 3, 0) # Check Formatting Style assert str(fp1) == "[0](1, 2, 3)"