class UnnecessaryFormatString(VisitorBasedCodemodCommand): DESCRIPTION: str = "Converts f-strings which perform no formatting to regular strings." @m.leave(m.FormattedString(parts=(m.FormattedStringText(), ))) def _check_formatted_string( self, _original_node: libcst.FormattedString, updated_node: libcst.FormattedString, ) -> libcst.BaseExpression: old_string_inner = libcst.ensure_type(updated_node.parts[0], libcst.FormattedStringText).value if "{{" in old_string_inner or "}}" in old_string_inner: # there are only two characters we need to worry about escaping. return updated_node old_string_literal = updated_node.start + old_string_inner + updated_node.end new_string_literal = ( updated_node.start.replace("f", "").replace("F", "") + old_string_inner + updated_node.end) old_string_evaled = eval(old_string_literal) # noqa new_string_evaled = eval(new_string_literal) # noqa if old_string_evaled != new_string_evaled: warn_message = ( f"Attempted to codemod |{old_string_literal}| to " + f"|{new_string_literal}| but don't eval to the same! First is |{old_string_evaled}| and " + f"second is |{new_string_evaled}|") self.warn(warn_message) return updated_node return libcst.SimpleString(new_string_literal)
def visit_FormattedString(self, node: cst.FormattedString) -> None: if not m.matches(node, m.FormattedString(parts=(m.FormattedStringText(),))): return old_string_inner = cst.ensure_type(node.parts[0], cst.FormattedStringText).value if "{{" in old_string_inner or "}}" in old_string_inner: old_string_inner = old_string_inner.replace("{{", "{").replace("}}", "}") new_string_literal = node.start.replace("f", "").replace("F", "") + old_string_inner + node.end self.report(node, replacement=cst.SimpleString(new_string_literal))
def _line_ranges_spanned_by_format_strings( source: str, ) -> Dict[libcst.CSTNode, LineRange]: def _code_range_to_line_range( code_range: libcst._position.CodeRange, ) -> LineRange: return code_range.start.line, code_range.end.line try: wrapper = libcst.metadata.MetadataWrapper(libcst.parse_module(source)) except libcst._exceptions.ParserSyntaxError as exception: # NOTE: This should not happen. If a file is unparseable for libcst, it # would probably have been unparseable for Pyre as well. In that case, # we would not have raised a 404 parse error and not reached here in the # first place. Still, catch the exception and just skip the special # handling of format strings. LOG.warning("Not moving out fixmes from f-strings because" f" libcst failed to parse the file: {exception}") return {} position_map = wrapper.resolve(libcst.metadata.PositionProvider) return { format_string: _code_range_to_line_range(position_map[format_string]) for format_string in libcst_matchers.findall( wrapper.module, libcst_matchers.FormattedString()) }
def leave_Call( # noqa: C901 self, original_node: cst.Call, updated_node: cst.Call) -> cst.BaseExpression: # Lets figure out if this is a "".format() call extraction = self.extract( updated_node, m.Call(func=m.Attribute( value=m.SaveMatchedNode(m.SimpleString(), "string"), attr=m.Name("format"), )), ) if extraction is not None: fstring: List[cst.BaseFormattedStringContent] = [] inserted_sequence: int = 0 stringnode = cst.ensure_type(extraction["string"], cst.SimpleString) tokens = _get_tokens(stringnode.raw_value) for (literal_text, field_name, format_spec, conversion) in tokens: if literal_text: fstring.append(cst.FormattedStringText(literal_text)) if field_name is None: # This is not a format-specification continue if format_spec is not None and len(format_spec) > 0: # TODO: This is supportable since format specs are compatible # with f-string format specs, but it would require matching # format specifier expansions. self.warn( f"Unsupported format_spec {format_spec} in format() call" ) return updated_node # Auto-insert field sequence if it is empty if field_name == "": field_name = str(inserted_sequence) inserted_sequence += 1 expr = _find_expr_from_field_name(field_name, updated_node.args) if expr is None: # Most likely they used * expansion in a format. self.warn( f"Unsupported field_name {field_name} in format() call" ) return updated_node # Verify that we don't have any comments or newlines. Comments aren't # allowed in f-strings, and newlines need parenthesization. We can # have formattedstrings inside other formattedstrings, but I chose not # to doeal with that for now. if self.findall(expr, m.Comment()): # We could strip comments, but this is a formatting change so # we choose not to for now. self.warn(f"Unsupported comment in format() call") return updated_node if self.findall(expr, m.FormattedString()): self.warn(f"Unsupported f-string in format() call") return updated_node if self.findall(expr, m.Await()): # This is fixed in 3.7 but we don't currently have a flag # to enable/disable it. self.warn(f"Unsupported await in format() call") return updated_node # Stripping newlines is effectively a format-only change. expr = cst.ensure_type( expr.visit(StripNewlinesTransformer(self.context)), cst.BaseExpression, ) # Try our best to swap quotes on any strings that won't fit expr = cst.ensure_type( expr.visit( SwitchStringQuotesTransformer(self.context, stringnode.quote[0])), cst.BaseExpression, ) # Verify that the resulting expression doesn't have a backslash # in it. raw_expr_string = self.module.code_for_node(expr) if "\\" in raw_expr_string: self.warn(f"Unsupported backslash in format expression") return updated_node # For safety sake, if this is a dict/set or dict/set comprehension, # wrap it in parens so that it doesn't accidentally create an # escape. if (raw_expr_string.startswith("{") or raw_expr_string.endswith("}")) and (not expr.lpar or not expr.rpar): expr = expr.with_changes(lpar=[cst.LeftParen()], rpar=[cst.RightParen()]) # Verify that any strings we insert don't have the same quote quote_gatherer = StringQuoteGatherer(self.context) expr.visit(quote_gatherer) for stringend in quote_gatherer.stringends: if stringend in stringnode.quote: self.warn( f"Cannot embed string with same quote from format() call" ) return updated_node fstring.append( cst.FormattedStringExpression(expression=expr, conversion=conversion)) return cst.FormattedString( parts=fstring, start=f"f{stringnode.prefix}{stringnode.quote}", end=stringnode.quote, ) return updated_node
def obf_universal(self, node: cst.CSTNode, *types): if m.matches(node, m.Name()): types = ('a', 'ca', 'v', 'cv') if not types else types node = cst.ensure_type(node, cst.Name) if self.can_rename(node.value, *types): node = self.get_new_cst_name(node) elif m.matches(node, m.NameItem()): node = cst.ensure_type(node, cst.NameItem) node = node.with_changes(name=self.obf_universal(node.name)) elif m.matches(node, m.Call()): node = cst.ensure_type(node, cst.Call) if self.change_methods or self.change_functions: node = self.new_obf_function_name(node) if self.change_arguments or self.change_method_arguments: node = self.obf_function_args(node) elif m.matches(node, m.Attribute()): node = cst.ensure_type(node, cst.Attribute) value = node.value attr = node.attr self.obf_universal(value) self.obf_universal(attr) elif m.matches(node, m.AssignTarget()): node = cst.ensure_type(node, cst.AssignTarget) node = node.with_changes(target=self.obf_universal(node.target)) elif m.matches(node, m.List() | m.Tuple()): node = cst.ensure_type(node, cst.List) if m.matches( node, m.List()) else cst.ensure_type(node, cst.Tuple) new_elements = [] for el in node.elements: new_elements.append(self.obf_universal(el)) node = node.with_changes(elements=new_elements) elif m.matches(node, m.Subscript()): node = cst.ensure_type(node, cst.Subscript) new_slice = [] for el in node.slice: new_slice.append( el.with_changes(slice=self.obf_slice(el.slice))) node = node.with_changes(slice=new_slice) node = node.with_changes(value=self.obf_universal(node.value)) elif m.matches(node, m.Element()): node = cst.ensure_type(node, cst.Element) node = node.with_changes(value=self.obf_universal(node.value)) elif m.matches(node, m.Dict()): node = cst.ensure_type(node, cst.Dict) new_elements = [] for el in node.elements: new_elements.append(self.obf_universal(el)) node = node.with_changes(elements=new_elements) elif m.matches(node, m.DictElement()): node = cst.ensure_type(node, cst.DictElement) new_key = self.obf_universal(node.key) new_val = self.obf_universal(node.value) node = node.with_changes(key=new_key, value=new_val) elif m.matches(node, m.StarredDictElement()): node = cst.ensure_type(node, cst.StarredDictElement) node = node.with_changes(value=self.obf_universal(node.value)) elif m.matches(node, m.If() | m.While()): node = cst.ensure_type(node, cst.IfExp) if m.matches( node, cst.If | cst.IfExp) else cst.ensure_type(node, cst.While) node = node.with_changes(test=self.obf_universal(node.test)) elif m.matches(node, m.IfExp()): node = cst.ensure_type(node, cst.IfExp) node = node.with_changes(body=self.obf_universal(node.body)) node = node.with_changes(test=self.obf_universal(node.test)) node = node.with_changes(orelse=self.obf_universal(node.orelse)) elif m.matches(node, m.Comparison()): node = cst.ensure_type(node, cst.Comparison) new_compars = [] for target in node.comparisons: new_compars.append(self.obf_universal(target)) node = node.with_changes(left=self.obf_universal(node.left)) node = node.with_changes(comparisons=new_compars) elif m.matches(node, m.ComparisonTarget()): node = cst.ensure_type(node, cst.ComparisonTarget) node = node.with_changes( comparator=self.obf_universal(node.comparator)) elif m.matches(node, m.FormattedString()): node = cst.ensure_type(node, cst.FormattedString) new_parts = [] for part in node.parts: new_parts.append(self.obf_universal(part)) node = node.with_changes(parts=new_parts) elif m.matches(node, m.FormattedStringExpression()): node = cst.ensure_type(node, cst.FormattedStringExpression) node = node.with_changes( expression=self.obf_universal(node.expression)) elif m.matches(node, m.BinaryOperation() | m.BooleanOperation()): node = cst.ensure_type(node, cst.BinaryOperation) if m.matches( node, m.BinaryOperation()) else cst.ensure_type( node, cst.BooleanOperation) node = node.with_changes(left=self.obf_universal(node.left), right=self.obf_universal(node.right)) elif m.matches(node, m.UnaryOperation()): node = cst.ensure_type(node, cst.UnaryOperation) node = node.with_changes( expression=self.obf_universal(node.expression)) elif m.matches(node, m.ListComp()): node = cst.ensure_type(node, cst.ListComp) node = node.with_changes(elt=self.obf_universal(node.elt)) node = node.with_changes(for_in=self.obf_universal(node.for_in)) elif m.matches(node, m.DictComp()): node = cst.ensure_type(node, cst.DictComp) node = node.with_changes(key=self.obf_universal(node.key)) node = node.with_changes(value=self.obf_universal(node.value)) node = node.with_changes(for_in=self.obf_universal(node.for_in)) elif m.matches(node, m.CompFor()): node = cst.ensure_type(node, cst.CompFor) new_ifs = [] node = node.with_changes(target=self.obf_universal(node.target)) node = node.with_changes(iter=self.obf_universal(node.iter)) for el in node.ifs: new_ifs.append(self.obf_universal(el)) node = node.with_changes(ifs=new_ifs) elif m.matches(node, m.CompIf()): node = cst.ensure_type(node, cst.CompIf) node = node.with_changes(test=self.obf_universal(node.test)) elif m.matches(node, m.Integer() | m.Float() | m.SimpleString()): pass else: pass # print(node) return node
def _convert_token_to_fstring_expression( self, field_name: str, conversion: Optional[str], arguments: Sequence[cst.Arg], containing_string: cst.SimpleString, ) -> Optional[cst.FormattedStringExpression]: expr = _find_expr_from_field_name(field_name, arguments) if expr is None: # Most likely they used * expansion in a format. self.warn(f"Unsupported field_name {field_name} in format() call") return None # Verify that we don't have any comments or newlines. Comments aren't # allowed in f-strings, and newlines need parenthesization. We can # have formattedstrings inside other formattedstrings, but I chose not # to doeal with that for now. if self.findall(expr, m.Comment()) and not self.allow_strip_comments: # We could strip comments, but this is a formatting change so # we choose not to for now. self.warn("Unsupported comment in format() call") return None if self.findall(expr, m.FormattedString()): self.warn("Unsupported f-string in format() call") return None if self.findall(expr, m.Await()) and not self.allow_await: # This is fixed in 3.7 but we don't currently have a flag # to enable/disable it. self.warn("Unsupported await in format() call") return None # Stripping newlines is effectively a format-only change. expr = cst.ensure_type( expr.visit(StripNewlinesTransformer(self.context)), cst.BaseExpression, ) # Try our best to swap quotes on any strings that won't fit expr = cst.ensure_type( expr.visit( SwitchStringQuotesTransformer(self.context, containing_string.quote[0])), cst.BaseExpression, ) # Verify that the resulting expression doesn't have a backslash # in it. raw_expr_string = self.module.code_for_node(expr) if "\\" in raw_expr_string: self.warn("Unsupported backslash in format expression") return None # For safety sake, if this is a dict/set or dict/set comprehension, # wrap it in parens so that it doesn't accidentally create an # escape. if (raw_expr_string.startswith("{") or raw_expr_string.endswith("}")) and (not expr.lpar or not expr.rpar): expr = expr.with_changes(lpar=[cst.LeftParen()], rpar=[cst.RightParen()]) # Verify that any strings we insert don't have the same quote quote_gatherer = StringQuoteGatherer(self.context) expr.visit(quote_gatherer) for stringend in quote_gatherer.stringends: if stringend in containing_string.quote: self.warn( "Cannot embed string with same quote from format() call") return None return cst.FormattedStringExpression(expression=expr, conversion=conversion)