def __init__(self, *args, **kwargs) -> None: """Initializes new visitor and saves all docstrings.""" super().__init__(*args, **kwargs) self._docstrings = get_docstring_tokens(self.file_tokens)
def get_quotes_errors(self, file_contents): tokens = [ Token(t) for t in tokenize.generate_tokens( lambda L=iter(file_contents): next(L)) ] docstring_tokens = get_docstring_tokens(tokens) for token in tokens: if token.type != tokenize.STRING: # ignore non strings continue # Remove any prefixes in strings like `u` from `u"foo"` # DEV: `last_quote_char` is 1 character, even for multiline strings # `"foo"` -> `"foo"` # `b"foo"` -> `"foo"` # `br"foo"` -> `"foo"` # `b"""foo"""` -> `"""foo"""` last_quote_char = token.string[-1] first_quote_index = token.string.index(last_quote_char) prefix = token.string[:first_quote_index].lower() unprefixed_string = token.string[first_quote_index:] # Determine if our string is multiline-based # "foo"[0] * 3 = " * 3 = """ # "foo"[0:3] = "fo # """foo"""[0:3] = """ is_docstring = token in docstring_tokens is_multiline_string = unprefixed_string[ 0] * 3 == unprefixed_string[0:3] start_row, start_col = token.start # If our string is a docstring # DEV: Docstring quotes must come before multiline quotes as it can as a multiline quote if is_docstring: if self.config['good_docstring'] in unprefixed_string: continue yield { 'message': 'Q002 Remove bad quotes from docstring', 'line': start_row, 'col': start_col, } # Otherwise if our string is multiline elif is_multiline_string: # If our string is or containing a known good string, then ignore it # (""")foo""" -> good (continue) # '''foo(""")''' -> good (continue) # (''')foo''' -> possibly bad if self.config['good_multiline'] in unprefixed_string: continue # If our string ends with a known good ending, then ignore it # '''foo("''') -> good (continue) # Opposite, """foo"""", would break our parser (cannot handle """" ending) if unprefixed_string.endswith( self.config['good_multiline_ending']): continue # Output our error yield { 'message': 'Q001 Remove bad quotes from multiline string', 'line': start_row, 'col': start_col, } # Otherwise (string is inline quote) else: # 'This is a string' -> Good # 'This is a "string"' -> Good # 'This is a \"string\"' -> Good # 'This is a \'string\'' -> Bad (Q003) Escaped inner quotes # '"This" is a \'string\'' -> Good Changing outer quotes would not avoid escaping # "This is a string" -> Bad (Q000) # "This is a 'string'" -> Good Avoids escaped inner quotes # "This is a \"string\"" -> Bad (Q000) # "\"This\" is a 'string'" -> Good string_contents = unprefixed_string[1:-1] # If string preferred type, check for escapes if last_quote_char == self.config['good_single']: if not self.config['avoid_escape'] or 'r' in prefix: continue if (self.config['good_single'] in string_contents and not self.config['bad_single'] in string_contents): yield { 'message': 'Q003 Change outer quotes to avoid escaping inner quotes', 'line': start_row, 'col': start_col, } if len(string_contents) < 1: yield { 'message': 'Q000 Remove bad quotes', 'line': start_row, 'col': start_col, } continue # If not preferred type, only allow use to avoid escapes. if not self.config['good_single'] in string_contents: if not (len(string_contents) <= 1 and re.match( r"^'(?:.|\n|\t)?'$", unprefixed_string)): yield { 'message': 'Q000 Remove bad quotes', 'line': start_row, 'col': start_col, }
def get_quotes_errors(self, file_contents): tokens = [ Token(t) for t in tokenize.generate_tokens( lambda L=iter(file_contents): next(L)) ] docstring_tokens = get_docstring_tokens(tokens) for token in tokens: if token.type != tokenize.STRING: # ignore non strings continue # Remove any prefixes in strings like `u` from `u"foo"` # DEV: `last_quote_char` is 1 character, even for multiline strings # `"foo"` -> `"foo"` # `b"foo"` -> `"foo"` # `br"foo"` -> `"foo"` # `b"""foo"""` -> `"""foo"""` last_quote_char = token.string[-1] first_quote_index = token.string.index(last_quote_char) unprefixed_string = token.string[first_quote_index:] # Determine if our string is multiline-based # "foo"[0] * 3 = " * 3 = """ # "foo"[0:3] = "fo # """foo"""[0:3] = """ is_docstring = token in docstring_tokens is_multiline_string = unprefixed_string[ 0] * 3 == unprefixed_string[0:3] start_row, start_col = token.start # If our string is a docstring # DEV: Docstring quotes must come before multiline quotes as it can as a multiline quote if is_docstring: if self.config['good_docstring'] in unprefixed_string: continue yield { 'message': 'Q002 Remove bad quotes from docstring', 'line': start_row, 'col': start_col, } # Otherwise if our string is multiline elif is_multiline_string: # If our string is or containing a known good string, then ignore it # (""")foo""" -> good (continue) # '''foo(""")''' -> good (continue) # (''')foo''' -> possibly bad if self.config['good_multiline'] in unprefixed_string: continue # Output our error yield { 'message': 'Q001 Remove bad quotes from multiline string', 'line': start_row, 'col': start_col, } # Otherwise (string is inline quote) else: # If our string is a known good string, then ignore it # (')foo' -> good (continue) # "it(')s" -> good (continue) # (")foo" -> possibly bad if self.config['good_single'] in unprefixed_string: continue # Output our error yield { 'message': 'Q000 Remove bad quotes', 'line': start_row, 'col': start_col, }
def __init__(self, *args, **kwargs) -> None: """Creates line tracking for tokens.""" super().__init__(*args, **kwargs) self._lines: TokenLines = defaultdict(list) self._docstrings = get_docstring_tokens(self.file_tokens)
def __init__(self, logical_line: str, previous_logical: str, tokens: List[tokenize.TokenInfo]) -> None: self.line = logical_line self.tokens = tokens prev_tokens = tokenize.tokenize(lambda L=iter([previous_logical.encode('utf-8')]): next(L)) self.docstring_tokens = get_docstring_tokens(prev_tokens, self.tokens)