def compactIf(node, thenPart, condition): """ Reduces the size of a if statement (without elsePart) using boolean operators instead of the typical keywords e.g. "if(something)make()" is translated to "something&&make()" which is two characters shorter. This however only works when the thenPart is only based on expressions and does not contain other statements. """ thenExpression = getattr(thenPart, "expression", None) if not thenExpression: # Empty semicolon statement => translate if into semicolon statement node.remove(condition) node.remove(node.thenPart) node.append(condition, "expression") node.type = "semicolon" else: # Has expression => Translate IF using a AND or OR operator if condition.type == "not": replacement = Node.Node(thenPart.tokenizer, "or") condition = condition[0] else: replacement = Node.Node(thenPart.tokenizer, "and") replacement.append(condition) replacement.append(thenExpression) thenPart.append(replacement, "expression") fixParens(thenExpression) fixParens(condition) node.parent.replace(node, thenPart)
def __createSimpleAssignment(identifier, valueNode): assignNode = Node.Node(None, "assign") identNode = Node.Node(None, "identifier") identNode.value = identifier assignNode.append(identNode) assignNode.append(valueNode) return assignNode
def __rebuildAsAssignment(node, firstVarStatement): """Rebuilds the items of a var statement into a assignment list and moves declarations to the given var statement.""" assignment = Node.Node(node.tokenizer, "semicolon") assignmentList = Node.Node(node.tokenizer, "comma") assignment.append(assignmentList, "expression") # Casting to list() creates a copy during the process (keeps loop stable) for child in list(node): if hasattr(child, "name"): # Cleanup initializer and move to assignment if hasattr(child, "initializer"): assign = __createSimpleAssignment(child.name, child.initializer) assignmentList.append(assign) firstVarStatement.append(child) else: # JS 1.7 Destructing Expression for identifier in child.names: firstVarStatement.append(__createDeclaration(identifier.value)) if hasattr(child, "initializer"): assign = __createMultiAssignment(child.names, child.initializer) assignmentList.append(assign) node.remove(child) # Patch parent node to contain assignment instead of declaration if len(assignmentList) > 0: node.parent.replace(node, assignment) # Special process for "for-in" loops # It is OK to be second because of assignments are not allowed at # all in for-in loops and so the first if basically does nothing # for these kind of statements. elif getattr(node, "rel", None) == "iterator": if hasattr(child, "name"): node.parent.replace(node, __createIdentifier(child.name)) else: # JS 1.7 Destructing Expressions node.parent.replace(node, child.names) # Edge case. Not yet found if this happen realistically else: if hasattr(node, "rel"): Console.warn("Remove related node (%s) from parent: %s" % (node.rel, node)) node.parent.remove(node) # Minor post-cleanup. Remove useless comma statement when only one expression is the result if len(assignmentList) == 1: assignment.replace(assignmentList, assignmentList[0])
def __splitTemplate(value, valueParams): """ Split string into plus-expression(s) - patchParam: string node containing the placeholders - valueParams: list of params to inject """ # Convert list with nodes into Python dict # [a, b, c] => {0:a, 1:b, 2:c} mapper = {pos: value for pos, value in enumerate(valueParams)} result = [] splits = __replacer.split(value) if len(splits) == 1: return None pair = Node.Node(None, "plus") for entry in splits: if entry == "": continue if len(pair) == 2: newPair = Node.Node(None, "plus") newPair.append(pair) pair = newPair if __replacer.match(entry): pos = int(entry[1]) - 1 # Items might be added multiple times. Copy to protect original. try: repl = mapper[pos] except KeyError: raise UserError("Invalid positional value: %s in %s" % (entry, value)) copied = copy.deepcopy(mapper[pos]) if copied.type not in ("identifier", "call"): copied.parenthesized = True pair.append(copied) else: child = Node.Node(None, "string") child.value = entry pair.append(child) return pair
def createHook(condition, thenPart, elsePart): """Creates a hook expression with the given then/else parts.""" hook = Node.Node(condition.tokenizer, "hook") hook.append(condition, "condition") hook.append(thenPart, "thenPart") hook.append(elsePart, "elsePart") return hook
def combineToCommaExpression(node): """ This method tries to combine a block with multiple statements into one semicolon statement with a comma expression containing all expressions from the previous block. This only works when the block exclusively consists of expressions as this do not work with other statements. Still this conversion reduces the size impact of many blocks and leads to the removal of a lot of curly braces in the result code. Example: {x++;y+=3} => x++,x+=3 """ if node is None or node.type != "block": return node counter = 0 for child in node: if child is None: pass elif child.type != "semicolon": return node else: counter = counter + 1 if counter == 1: return node comma = Node.Node(node.tokenizer, "comma") for child in list(node): if child is None: pass # Ignore empty semicolons if hasattr(child, "expression"): comma.append(child.expression) semicolon = Node.Node(node.tokenizer, "semicolon") semicolon.append(comma, "expression") parent = node.parent parent.replace(node, semicolon) return semicolon
def combineExpressions(condition, thenExpression, elseExpression): """Combines then and else expression using a hook statement.""" hook = createHook(condition, thenExpression, elseExpression) semicolon = Node.Node(condition.tokenizer, "semicolon") semicolon.append(hook, "expression") fixParens(condition) fixParens(thenExpression) fixParens(elseExpression) return semicolon
def reworkElse(node, elsePart): """ If an if ends with a return/throw we are able to inline the content of the else to the same parent as the if resides into. This method deals with all the nasty details of this operation. """ target = node.parent targetIndex = target.index(node) + 1 # A workaround for compact if-else blocks # We are a elsePart of the if where we want to move our # content to. This cannot work. So we need to wrap ourself # into a block and move the else statements to this newly # established block if not target.type in ("block", "script"): newBlock = Node.Node(None, "block") # Replace node with newly created block and put ourself into it node.parent.replace(node, newBlock) newBlock.append(node) # Update the target and the index target = newBlock targetIndex = 1 if not target.type in ("block", "script"): # print("No possible target found/created") return elsePart if elsePart.type == "block": for child in reversed(elsePart): target.insert(targetIndex, child) # Remove else block from if statement node.remove(elsePart) else: target.insert(targetIndex, elsePart) return
def __combineVarStatements(node): """Top level method called to optimize a script node.""" if len(node.scope.declared) == 0: return firstVar = __findFirstVarStatement(node) # Special case, when a node has variables, but no valid "var" block to hold them # This happens in cases where there is a for-loop which contains a "var", but # there are no other variable declarations anywhere. In this case we are not able # to optimize the code further and just exit at this point # Only size-saving when there are multiple for-in loops, but no other var statement or first # "free" var declaration is after for-loops. if not firstVar: firstVar = Node.Node(None, "var") node.insert(0, firstVar) __patchVarStatements(node, firstVar) __cleanFirst(firstVar) # Remove unused "var" if len(firstVar) == 0: firstVar.parent.remove(firstVar) else: # When there is a classical for loop immediately after our # first var statement, then we try to move the var declaration # into there as a setup expression firstVarParent = firstVar.parent firstVarPos = firstVarParent.index(firstVar) if len(firstVarParent) > firstVarPos + 1: possibleForStatement = firstVarParent[firstVarPos + 1] if possibleForStatement.type == "for" and not hasattr( possibleForStatement, "setup"): possibleForStatement.append(firstVar, "setup")
def __recurser(node, unused): """The cleanup part which always processes one scope and cleans up params and variable definitions which are unused.""" retval = False # Process children if node.type != "function": for child in node: # None children are allowed sometimes e.g. during array_init like [1,2,,,7,8] if child is not None: if __recurser(child, unused): retval = True if node.type == "script" and hasattr(node, "parent"): # Remove unused parameters params = getattr(node.parent, "params", None) if params: # Start from back, as we can only remove params as long # as there is not a required one after the current one for identifier in reversed(params): if identifier.value in unused: Console.debug("Removing unused parameter '%s' in line %s", identifier.value, identifier.line) params.remove(identifier) retval = True else: break # Remove function names which are unused if node.parent.functionForm == "expressed_form": funcName = getattr(node.parent, "name", None) if funcName is not None and funcName in unused: Console.debug("Removing unused function name at line %s" % node.line) del node.parent.name retval = True elif node.type == "function": # Remove full unused functions (when not in top-level scope) if node.functionForm == "declared_form" and getattr( node, "parent", None) and node.parent.type != "call": funcName = getattr(node, "name", None) if funcName is not None and funcName in unused: Console.debug( "Removing unused function declaration %s at line %s" % (funcName, node.line)) node.parent.remove(node) retval = True elif node.type == "var": for decl in reversed(node): if getattr(decl, "name", None) in unused: if hasattr(decl, "initializer"): init = decl.initializer if init.type in ("null", "this", "true", "false", "identifier", "number", "string", "regexp"): Console.debug( "Removing unused primitive variable %s at line %s" % (decl.name, decl.line)) node.remove(decl) retval = True elif init.type == "function" and ( not hasattr(init, "name") or init.name in unused): Console.debug( "Removing unused function variable %s at line %s" % (decl.name, decl.line)) node.remove(decl) retval = True # If we have only one child, we replace the whole var statement with just the init block elif len(node) == 1: semicolon = Node.Node(init.tokenizer, "semicolon") semicolon.append(init, "expression") # Protect non-expressions with parens if init.type in ("array_init", "object_init"): init.parenthesized = True elif init.type == "call" and init[0].type == "function": init[0].parenthesized = True node.parent.replace(node, semicolon) retval = True # If we are the last declaration, move it out of node and append after var block elif node[-1] == decl or node[0] == decl: isFirst = node[0] == decl node.remove(decl) nodePos = node.parent.index(node) semicolon = Node.Node(init.tokenizer, "semicolon") semicolon.append(init, "expression") # Protect non-expressions with parens if init.type in ("array_init", "object_init"): init.parenthesized = True elif init.type == "call" and init[0].type == "function": init[0].parenthesized = True if isFirst: node.parent.insert(nodePos, semicolon) else: node.parent.insert(nodePos + 1, semicolon) retval = True else: Console.debug( "Could not automatically remove unused variable %s at line %s without possible side-effects" % (decl.name, decl.line)) else: node.remove(decl) retval = True if len(node) == 0: Console.debug("Removing empty 'var' block at line %s" % node.line) node.parent.remove(node) return retval
def __recurser(node, table): counter = 0 # Process children for child in list(node): if child is not None: counter += __recurser(child, table) # Process all method calls if node.type == "call": funcName = None funcNameNode = None # Uses global translation method (not typical) if node[0].type == "identifier": funcNameNode = node[0] # Uses namespaced translation method. # Typically core.locale.Translation.tr() or Translation.tr() elif node[0].type == "dot" and node[0][1].type == "identifier": funcNameNode = node[0][1] # Gettext methods only at the moment funcName = funcNameNode and funcNameNode.value if funcName in translationFunctions: Console.debug("Found translation method %s in %s", funcName, node.line) Console.indent() params = node[1] # Remove marktr() calls if funcName == "marktr": node.parent.remove(node) # Verify param types elif params[0].type is not "string": # maybe something marktr() relevant being used, in this case we need to keep the call and inline the data pass # Error handling elif (funcName == "trn" or funcName == "trc") and params[1].type != "string": Console.warn( "Expecting translation string to be type string: %s at line %s" % (params[1].type, params[1].line)) # Signature tr(msg, arg1, ...) elif funcName == "tr": key = params[0].value if key in table: params[0].value = table[key] counter += 1 if len(params) == 1: node.parent.replace(node, params[0]) else: replacement = __splitTemplate(params[0].value, params[1:]) if replacement: node.parent.replace(node, replacement) # Signature trc(context, msg, arg1, ...) elif funcName == "trc": key = "%s[C:%s]" % (params[1].value, params[0].value) if key in table: params[1].value = table[key] counter += 1 if len(params) == 2: node.parent.replace(node, params[1]) else: replacement = __splitTemplate(params[1].value, params[2:]) if replacement: node.parent.replace(node, replacement) # Signature trn(msgSingular, msgPlural, int, arg1, ...) elif funcName == "trn": key = "%s[N:%s]" % (params[0].value, params[1].value) if key not in table: Console.outdent() return counter counter += 1 # Use optimized trnc() method instead of trn() funcNameNode.value = "trnc" # Remove first two string parameters params.remove(params[0]) params.remove(params[0]) # Inject new object into params container = Node.Node(None, "object_init") params.insert(0, container) # Create new construction with all properties generated from the translation table for plural in table[key]: pluralEntry = Node.Node(None, "property_init") pluralEntryIdentifier = Node.Node(None, "identifier") pluralEntryIdentifier.value = plural pluralEntryValue = Node.Node(None, "string") pluralEntryValue.value = table[key][plural] pluralEntry.append(pluralEntryIdentifier) pluralEntry.append(pluralEntryValue) container.append(pluralEntry) # Replace strings with plus operations to omit complex client side string operation if len(params) > 2: for pluralEntry in container: replacement = __splitTemplate(pluralEntry[1].value, params[2:]) if replacement: pluralEntry.replace(pluralEntry[1], replacement) # When all variables have been patched in all string with placeholder # we are able to remove the whole list of placeholder values afterwards while len(params) > 2: params.pop() Console.outdent() return counter
def createReturn(value): """Creates a return statement with the given value.""" ret = Node.Node(value.tokenizer, "return") ret.append(value, "value") return ret
def __optimize(node, compressor): # Process from inside to outside # on a copy of the node to prevent it from forgetting children when structure is modified for child in list(node): # None children are allowed sometimes e.g. during array_init like [1,2,,,7,8] if child is not None: __optimize(child, compressor) # Cleans up empty semicolon statements (or pseudo-empty) if node.type == "semicolon" and node.parent.type in ("block", "script"): expr = getattr(node, "expression", None) if not expr or expr.type in ("null", "this", "true", "false", "identifier", "number", "string", "regexp"): # Keep scrict mode hints if expr and expr.type is "string" and expr.value == "use strict": pass else: if expr is not None: Console.debug( "Remove empty statement at line %s of type: %s", expr.line, expr.type) node.parent.remove(node) return # Remove unneeded parens if getattr(node, "parenthesized", False): cleanParens(node) # Pre-compute numeric expressions where it makes sense if node.type in ( "plus", "minus", "mul", "div", "mod") and node[0].type == "number" and node[1].type == "number": firstNumber = node[0] secondNumber = node[1] operator = node.type # Only do for real numeric values and not for protected strings (float differences between Python and JS) if isinstance(firstNumber.value, str) or isinstance( secondNumber.value, str): pass elif operator == "plus": Console.debug("Precompute numeric %s operation at line: %s", operator, node.line) firstNumber.value += secondNumber.value node.parent.replace(node, firstNumber) elif operator == "minus": Console.debug("Precompute numeric %s operation at line: %s", operator, node.line) firstNumber.value -= secondNumber.value node.parent.replace(node, firstNumber) else: if operator == "mul": result = firstNumber.value * secondNumber.value elif operator == "div" and secondNumber.value is not 0: result = firstNumber.value / secondNumber.value elif operator == "mod": result = firstNumber.value % secondNumber.value else: result = None if result is not None and len(str(result)) < len( compressor.compress(node)): Console.debug("Precompute numeric %s operation at line: %s", operator, node.line) firstNumber.value = result node.parent.replace(node, firstNumber) # Pre-combine strings (even supports mixed string + number concats) elif node.type == "plus" and node[0].type in ( "number", "string") and node[1].type in ("number", "string"): Console.debug("Joining strings at line: %s", node.line) node[0].value = "%s%s" % (node[0].value, node[1].value) node[0].type = "string" node.parent.replace(node, node[0]) # Pre-combine last with last (special case e.g.: somevar + "hello" + "world") elif node.type == "plus" and node[0].type == "plus" and node[0][ 1].type in ("number", "string") and node[1].type in ( "number", "string") and node[0][1].type == node[1].type: node[1].value = "%s%s" % (node[0][1].value, node[1].value) node[1].type = "string" node.replace(node[0], node[0][0]) # Unwrap blocks if node.type == "block": if node.parent.type in ("try", "catch", "finally"): pass elif len(node) == 0: Console.debug("Replace empty block with semicolon at line: %s", node.line) repl = Node.Node(node.tokenizer, "semicolon") node.parent.replace(node, repl) node = repl elif len(node) == 1: if node.parent.type == "if" and node.rel == "thenPart" and hasattr( node.parent, "elsePart") and containsIf(node): # if with else where the thenBlock contains another if pass elif node.parent.type == "if" and node.rel == "thenPart" and containsIfElse( node): # if without else where the thenBlock contains a if-else pass elif node.parent.type in ("case", "default"): # virtual blocks inside case/default statements pass else: # debug("Removing block for single statement at line %s", node.line) node.parent.replace(node, node[0]) node = node[0] else: node = combineToCommaExpression(node) # Remove "empty" semicolons which are inside a block/script parent if node.type == "semicolon": if not hasattr(node, "expression"): if node.parent.type in ("block", "script"): Console.debug("Remove empty semicolon expression at line: %s", node.line) node.parent.remove(node) elif node.parent.type == "if": rel = getattr(node, "rel", None) if rel == "elsePart": Console.debug("Remove empty else part at line: %s", node.line) node.parent.remove(node) # Process all if-statements if node.type == "if": condition = node.condition thenPart = node.thenPart elsePart = getattr(node, "elsePart", None) # Optimize for empty thenPart if elsePart is available if thenPart.type == "semicolon" and not hasattr( thenPart, "expression") and elsePart: if condition.type == "not": node.replace(condition, condition[0]) condition = condition[0] else: repl = Node.Node(None, "not") node.replace(condition, repl) repl.append(condition) fixParens(condition) condition = repl node.replace(thenPart, elsePart) thenPart = elsePart elsePart = None # Optimize using hook operator if elsePart and thenPart.type == "return" and elsePart.type == "return" and hasattr( thenPart, "value") and hasattr(elsePart, "value"): # Combine return statement replacement = createReturn( createHook(condition, thenPart.value, elsePart.value)) node.parent.replace(node, replacement) return # Check whether if-part ends with a return statement. Then # We do not need a else statement here and just can wrap the whole content # of the else block inside the parent if elsePart and endsWithReturnOrThrow(thenPart): reworkElse(node, elsePart) elsePart = None # Optimize using "AND" or "OR" operators # Combine multiple semicolon statements into one semicolon statement using an "comma" expression thenPart = combineToCommaExpression(thenPart) elsePart = combineToCommaExpression(elsePart) # Optimize remaining if or if-else constructs if elsePart: mergeParts(node, thenPart, elsePart, condition, compressor) elif thenPart.type == "semicolon": compactIf(node, thenPart, condition)
def __createIdentifier(value): identifier = Node.Node(None, "identifier") identifier.value = value return identifier
def __createDeclaration(name): declNode = Node.Node(None, "declaration") declNode.name = name declNode.readOnly = False return declNode
def __createMultiAssignment(names, valueNode): assignNode = Node.Node(None, "assign") assignNode.append(names) assignNode.append(valueNode) return assignNode