def check(uast): findings = [] if_nodes = bblfsh.filter(uast, "//*[@roleIf and @roleStatement and not(@roleElse)]") def hash_condition(if_node): for child in if_node.children: if bblfsh.role_id("CONDITION") in child.roles: return utils.hash_node(child).hexdigest() return None for if_node in if_nodes: condition_hashes = set() cond_hash = hash_condition(if_node) if not cond_hash: continue condition_hashes.add(cond_hash) for else_node in bblfsh.filter(if_node, "//*[@roleElse and @roleIf and @roleStatement]"): h = hash_condition(else_node) if h in condition_hashes: findings.append({"msg": "Else condition repeated", "pos": else_node.start_position}) else: condition_hashes.add(h) return findings
def testFilterProperties(self): node = Node() node.properties['k1'] = 'v2' node.properties['k2'] = 'v1' self.assertTrue(any(filter(node, "//*[@k2='v1']"))) self.assertTrue(any(filter(node, "//*[@k1='v2']"))) self.assertFalse(any(filter(node, "//*[@k1='v1']")))
def check(uast): findings = [] for cl in [ utils.JClass(i) for i in bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") ]: instance_creations = bblfsh.filter( cl.node, "//ClassInstanceCreation/SimpleType/Identifier") all_creations = {i.properties["Name"] for i in instance_creations} for cls in [ utils.JClass(i) for i in bblfsh.filter( cl.node, "//TypeDeclaration//TypeDeclaration") ]: for child in cls.node.children: if child.internal_type == "Modifier" and child.token == "private" and\ cls.name not in all_creations: findings.append({ "msg": "Private class {} defined but not used, remove it". format(cls.name), "pos": cls.node.start_position }) return findings
def __init__(self, node: bblfsh.Node) -> None: self.name = '' self.methods: List[Method] = [] self.parent = '' self.implements: List[str] = [] self.node = node self.body_declarations: List[bblfsh.Node] = [] fields = bblfsh.filter(node, "//FieldDeclaration") self.fields = [JClassField(i) for i in fields] for c in node.children: if c.properties["internalRole"] == "name": self.name = c.properties["Name"] elif c.properties["internalRole"] == "superclassType": names = bblfsh.filter(c, "//Identifier") self.parent = '.'.join([i.properties["Name"] for i in names]) elif c.properties["internalRole"] == "superInterfaceTypes": for iface in c.children: names = bblfsh.filter(iface, "//Identifier") names_qualified = '.'.join( [i.properties["Name"] for i in names]) self.implements.append(names_qualified) elif c.properties["internalRole"] == "bodyDeclarations": self.body_declarations.append(c) self.methods = get_methods(node)
def testManyFilters(self): root = self.client.parse(__file__).uast root.properties['k1'] = 'v2' root.properties['k2'] = 'v1' import resource before = resource.getrusage(resource.RUSAGE_SELF) for _ in range(100): filter(root, "//*[@roleIdentifier]") after = resource.getrusage(resource.RUSAGE_SELF) # Check that memory usage has not doubled after running the filter self.assertLess(after[2] / before[2], 2.0)
def check(uast): findings = [] fnls = bblfsh.filter(uast, "//*[@roleFinally]") for f in fnls: throws = bblfsh.filter(uast, "//*[@roleThrow or @roleReturn]") for t in throws: findings.append({"msg": "Don't throw or return inside a finally (line {})", "pos": t.start_position}) return findings
def check(uast): findings = [] switches = bblfsh.filter(uast, "//SwitchStatement") for i in switches: cases = bblfsh.filter(i, "//SwitchCase") for c in cases: breaks = bblfsh.filter(c, "//*[@roleCase and @roleBreak]") if len(list(breaks)) == 0: findings.append({"msg": "Switch without break", "pos": c.start_position}) return findings
def check(uast): findings = [] cl_nodes = bblfsh.filter( uast, "//TypeDeclaration[@roleDeclaration and @roleType]//SimpleType" "[@internalRole='superInterfaceTypes']//Identifier[@Name='Serializable']/" "parent::*/parent::TypeDeclaration") for cl in cl_nodes: jc = utils.JClass(cl) for method in jc.methods: if method.name == 'readResolve' and 'private' in method.modifiers and\ method.return_ and method.return_.type_name == 'Object' and\ not method.arguments: findings.append({ "msg": "Class {} implementing Serializable should have a public or protested " "readResolve() method".format(jc.name), "pos": cl.start_position }) break else: findings.append({ "msg": "Class {} implementing Serializable should have a readResolve() method" .format(jc.name), "pos": cl.start_position }) return findings
def check(uast): findings = [] classes = [] name2class = {} cl_nodes = bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") for cl in cl_nodes: jc = utils.JClass(cl) name2class[jc.name] = jc for clname, cl in name2class.items(): if not cl.parent: continue parent = name2class.get(cl.parent) if not parent: continue common = set([i.name for i in cl.fields]) & set( [i.name for i in parent.fields]) if len(common): findings.append({ "msg": 'Class {} uses field(s) with same name as parent {}: {}'. format(cl.name, parent.name, common) }) return findings
def check(uast): findings = [] name2class = {} cl_nodes = bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") for cl in cl_nodes: jc = utils.JClass(cl) name2class[jc.name] = jc for clname, cl in name2class.items(): if not cl.parent: continue methods = cl.methods for method in methods: par_class = name2class.get(cl.parent) if not par_class: continue for parmethod in par_class.methods: if parmethod.name != method.name and \ parmethod.name.lower() == method.name.lower(): findings.append({ "msg": "Methods with same name but different casing in subclass: " "{}.{} and {}.{}".format(clname, method.name, cl.parent, parmethod.name) }) return findings
def analyze(self, ptr_from: ReferencePointer, ptr_to: ReferencePointer, # noqa: D data_service: DataService, changes: Iterable[Change]) -> [Comment]: self._log.info("analyze %s %s", ptr_from.commit, ptr_to.commit) comments = [] parser = TokenParser(stem_threshold=100, single_shot=True) words = autocorrect.word.KNOWN_WORDS.copy() try: for name in self.model.names: if len(name) >= 3: autocorrect.word.KNOWN_WORDS.add(name) for change in changes: suggestions = defaultdict(list) new_lines = set(find_new_lines(change.base, change.head)) for node in bblfsh.filter(change.head.uast, "//*[@roleIdentifier]"): if node.start_position is not None and node.start_position.line in new_lines: for part in parser.split(node.token): if part not in self.model.names: fixed = autocorrect.spell(part) if fixed != part: suggestions[node.start_position.line].append( (node.token, part, fixed)) for line, s in suggestions.items(): comment = Comment() comment.file = change.head.path comment.text = "\n".join("`%s`: %s > %s" % fix for fix in s) comment.line = line comment.confidence = 100 comments.append(comment) finally: autocorrect.word.KNOWN_WORDS = words return comments
def check(uast): findings = [] binexpr_nodes = bblfsh.filter(uast, "//InfixExpression[@roleBinary and @roleExpression]") for node in binexpr_nodes: left = None right = None for c in node.children: if bblfsh.role_id("LEFT") in c.roles: left = c elif bblfsh.role_id("RIGHT") in c.roles: right = c elif c.token in ["=", "*", "+"]: left = None right = None break if left and right: break if not left or not right: continue if utils.hash_node(left).hexdigest() == utils.hash_node(right).hexdigest(): findings.append({"msg": "Equal terms on both sides of binary expression, ", "pos": node.start_position}) return findings
def check(uast): findings = [] jclasses = [ utils.JClass(i) for i in bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") ] for cl in jclasses: hash2methods = {} for m in cl.methods: body_hash = utils.hash_node(m.body).hexdigest() if body_hash in hash2methods: findings.append({ "msg": "Method {m1_name} has the same implementation as method {m2_name}" .format(m1_name=hash2methods[body_hash].name, m2_name=m.name), "pos": hash2methods[body_hash].node.start_position }) else: hash2methods[body_hash] = m return findings
def _validate_filter(self, resp): results = filter(resp.uast, "//Num") self.assertIsInstance(resp.uast, Node) self.assertEqual(next(results).token, "0") self.assertEqual(next(results).token, "1") self.assertEqual(next(results).token, "100") self.assertEqual(next(results).token, "10")
def check(uast): findings = [] for_nodes = bblfsh.filter(uast, "//ForStatement") forVar = None for fn in for_nodes: # Get the loop var for fc in fn.children: if fc.properties["internalRole"] == "expression": ids = bblfsh.filter(fc, "//Identifier") for i in ids: if i.properties["internalRole"] == "leftOperand": forVar = i.properties["Name"] break # Search for Assign / Postfix / Prefix exprs inside the for body postfixIds = bblfsh.filter( fn, "//Block//PostfixExpression/Identifier[@Name='{}']".format(forVar)) for p in postfixIds: findings.append({ "msg": "Loop variable {} modified".format(forVar), "pos": p.start_position }) prefixIds = bblfsh.filter( fn, "//Block//PrefixExpression/Identifier[@Name='{}']".format(forVar)) for p in prefixIds: findings.append({ "msg": "Loop variable {} modified".format(forVar), "pos": p.start_position }) assignIds = bblfsh.filter( fn, "//Block//Assignment//Identifier[@internalRole='leftHandSide' " "and @Name='{}']".format(forVar)) for identifier in assignIds: findings.append({ "msg": "Loop variable {} modified".format(forVar), "pos": identifier.start_position }) return findings
def check(uast): findings = [] cl_nodes = bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") classes = [] lowered_field_names = set() for cl in cl_nodes: jc = utils.JClass(cl) for field in [i.name for i in jc.fields]: lowered_field_names.add(field.lower()) for method in jc.methods: # Avoid complex getters/setters if len(list(method.body.children)) > 1: continue m_name = method.name.lower() if m_name.startswith("get"): should_return = m_name[3:] returns = bblfsh.filter(method.body, "//*[@roleReturn]//Identifier") for r in returns: returned_var = r.properties["Name"] if returned_var and returned_var.lower() != should_return.lower(): findings.append({"msg": "Getter '{}' probably should return '{}' instead of '{}'" .format(method.name, should_return, returned_var), "pos": r.start_position}) elif m_name.startswith("set"): should_assign = m_name[3:] assgns = bblfsh.filter(method.body, "//Assignment//Identifier[@roleLeft]|" "//Assignment//*[@roleLeft]//Identifier") for a in assgns: assigned_var = a.properties["Name"] if assigned_var and assigned_var.lower() != should_assign.lower(): findings.append({"msg": "Setter '{}' probably should assign to '{}' instead of '{}'" .format(method.name, should_assign, assigned_var), "pos": a.start_position}) return findings
def check(uast): findings = [] switches = bblfsh.filter(uast, "//SwitchStatement") for i in switches: cases = list(bblfsh.filter(i, "//SwitchCase")) if not cases: continue for r in range(len(cases)): c = cases[r] if bblfsh.role_id('DEFAULT') in c.roles and r != (len(cases)-1): findings.append({"msg": "'default' should be the line switch case", "pos": c.start_position}) return findings
def check(uast): findings = [] switches = bblfsh.filter(uast, "//SwitchStatement") for i in switches: cases = bblfsh.filter(i, "//SwitchCase") for c in cases: if bblfsh.role_id("DEFAULT") in c.roles: break else: findings.append({ "msg": "Switch without default case", "pos": i.start_position }) return findings
def __init__(self, node: bblfsh.Node) -> None: self.name = '' self.node = node self.modifiers: List[str] = [] self.type_name = '' name_node = list( bblfsh.filter(node, "//VariableDeclarationFragment/Identifier"))[0] self.name = name_node.properties["Name"] modifier_nodes = bblfsh.filter(node, "//Modifier") for m in modifier_nodes: self.modifiers.append(m.token) type_node = list(bblfsh.filter(node, "//*[@roleType]"))[0] if type_node.internal_type == "SimpleType": self.type_name = list(bblfsh.filter( type_node, "//Identifier"))[0].properties["Name"]
def check(uast): findings = [] cl_nodes = bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") jclasses = [utils.JClass(i) for i in cl_nodes] for jc in jclasses: if jc.parent == 'HttpServlet': mains = bblfsh.filter( uast, "//FunctionGroup//Alias/Identifier[@Name='main']") for m in mains: findings.append({ "msg": "Don't use a main() function on HttpServlet derived classes", "pos": m.start_position }) return findings
def check(uast): findings = [] calls = bblfsh.filter( uast, "//MethodInvocation/" "Identifier[@roleCall and @roleReceiver and @Name='Arrays']/parent::MethodInvocation/" "Identifier[@roleCall and @roleCallee and @Name='asList']/parent::MethodInvocation" ) for c in calls: child_args = bblfsh.filter(c, "//*[@roleArgument and @roleLiteral]") if len(list(child_args)): findings.append({ "msg": "Don't use slow Arrays.asList with primitives", "pos": c.start_position }) return findings
def check(uast): findings = [] comps = bblfsh.filter(uast, "//InfixExpression[@roleGreaterThan]") for comp in comps: m = bblfsh.filter(comp, "//MethodInvocation//Identifier[@Name='indexOf']") for idx_call in m: rights = bblfsh.filter( comp, "//NumberLiteral[@internalRole='rightOperand' and @token='0']") for r in rights: findings.append({ "msg": "indexOf greater than zero ignores the first field", "pos": r.start_position }) return findings
def check(uast): findings = [] sql_commands = set( {"SELECT", "UPDATE", "DELETE", "INSERT", "CREATE", "ALTER", "DROP"}) infixes = bblfsh.filter( uast, "//InfixExpression[@roleAdd and @roleBinary and @roleOperator]") for i in infixes: strs = bblfsh.filter(i, "//String[@internalRole='leftOperand']") for s in strs: first_word = s.properties["Value"].split()[0] if first_word in sql_commands: findings.append({ "msg": "Potential SQL injection vulnerability", "pos": s.start_position }) return findings
def check(uast): findings = [] fin_calls = bblfsh.filter(uast, "//MethodInvocation//" "Identifier[@roleCall and @roleReceiver and @Name='System']/parent::MethodInvocation/" "Identifier[@roleCall and @roleCallee and @Name='gc']/parent::MethodInvocation") if len(list(fin_calls)): findings.append({"msg": "Don't use System.gc()", "pos": None}) fin_calls = bblfsh.filter(uast, "//MethodInvocation//" "Identifier[@roleCall and @roleReceiver and @Name='Runtime']/parent::MethodInvocation//" "Identifier[@roleCall and @roleCallee and @Name='getRuntime']/parent::MethodInvocation/parent::MethodInvocation//" "Identifier[@roleCall and @roleCallee and @Name='gc']/parent::MethodInvocation") if len(list(fin_calls)): findings.append({"msg": "Don't use Runtime.getRuntime().gc(})", "pos": None}) return findings
def search_usages(instance_node, search_node): these_usages = [] instance_search_query = "//VariableDeclarationFragment/ClassInstanceCreation" + \ "/SimpleType/Identifier[@Name='%s']" % type_name + \ "/ancestor::VariableDeclarationFragment/Identifier" usage_search_query = "//*[@roleCall and @roleReceiver and @Name='%s']/" + \ "parent::*/Identifier[@roleCall and @roleCallee and " + \ "@Name='{}']".format(method_name) _vars = bblfsh.filter(instance_node, instance_search_query) for var in _vars: usages = bblfsh.filter(search_node, usage_search_query % var.properties["Name"]) these_usages.extend(list(usages)) return these_usages
def check(uast): findings = [] classes = [] parent2children = {} name2class = {} cl_nodes = bblfsh.filter(uast, "//*[@roleDeclaration and @roleType]") for cl in cl_nodes: jc = utils.JClass(cl) name2class[jc.name] = jc if jc.parent in parent2children: parent2children[jc.parent].append(jc.name) else: parent2children[jc.name] = [jc.parent] classes.append(jc) name2class[jc.name] = cl for cl in parent2children: for child in parent2children[cl]: # Alternative: generate a string with all the child names in the Identifier selector child_cl = name2class.get(cl) if not child_cl: continue calls = bblfsh.filter( child_cl, "(//MethodInvocation//Identifier[@roleCall and @roleReceiver and @Name='%s']|" "//QualifiedIdentifier//Identifier[@Name='%s'])" % (child, child)) for call in calls: findings.append({ "msg": "Call in class {} to subclass {} member".format(cl, child), "pos": call.start_position.line }) return findings
def check(uast): findings = [] ifs = bblfsh.filter(uast, "//*[@roleBitwise and @roleCondition and @roleIf]") if len(list(ifs)) > 0: findings.append( {"msg": "Potential bug: bitwise operator inside if condition"}) return findings
def check(uast): findings = [] format_calls = bblfsh.filter( uast, "//MethodInvocation/" "Identifier[@roleCall and @roleReceiver and @Name='String']/parent::MethodInvocation/" "Identifier[@roleCall and @roleCallee and @Name='format']/parent::MethodInvocation" ) for fcall in format_calls: args = list(bblfsh.filter(fcall, "//*[@internalRole='arguments']")) if len(args) == 0: continue format_str = args[0] if format_str.internal_type != 'String': # Validating format strings assigned elsewhere on the same file is possible, # but won't be doing it here for brevity sake continue # For the reason stated above, we only validate %d str_val = format_str.properties["Value"] re_res = re.findall(r'[^%]%d', str_val) # Validate number of args if len(re_res) != len(args[1:]): findings.append({ "msg": "Format string doesn't match number of args", "pos": format_str.start_position }) # Validate type of args (for %d it should have the NumberLiteral role) for arg in args[1:]: froles = filter(lambda x: x == bblfsh.role_id('NUMBER'), arg.roles) if len(list(froles)) == 0: findings.append({ "msg": "Format string argument is not numeric", "pos": arg.start_position }) return findings
def check(uast): findings = [] calls = bblfsh.filter( uast, "//MethodInvocation/Identifier" "[@roleCall and @roleCallee and (@Name='indexOf' or " "@Name='lastIndexOf' or @Name='startsWith')]/parent::MethodInvocation") for c in calls: substr_calls = bblfsh.filter( c, "//MethodInvocation/Identifier[@roleCall " "and @roleCallee and @Name='substring']") for sc in substr_calls: findings.append({ "msg": "Don't use indexOf, lastIndexOf or startsWith chainged with substring", "pos": sc.start_position }) return findings
def check(uast): findings = [] unary_repeated = bblfsh.filter(uast, "//*[@roleOperator and @token='=' and @internalRole='operator']/parent::*/" "/*[@internalRole='rightHandSide']/*[@internalRole='operator' and @roleOperator]/parent::*") for ur in unary_repeated: findings.append({"msg": "Don't repeat Unary operators", "pos": ur.start_position}) return findings