def testFilterProperties(self): node = Node() node.properties['k1'] = 'v2' node.properties['k2'] = 'v1' self.assertTrue(any(filter(node, "//*[@k2='v1']"))) self.assertTrue(any(filter(node, "//*[@k1='v2']"))) self.assertFalse(any(filter(node, "//*[@k1='v1']")))
def test_extract_changed_nodes(self): root = Node( start_position=Position(line=10), children=[Node(start_position=Position(line=5))]) nodes = extract_changed_nodes(root, [10]) self.assertEqual(len(nodes), 1) self.assertEqual(nodes[0].start_position.line, 10)
def test_files_by_language(self): file_stats = {"js": 2, "ruby": 7, "Python": 5} files = [] for language, n_files in file_stats.items(): for i in range(n_files): files.append(File(language=language, uast=Node(children=[Node()]), path=language + str(i))) result = files_by_language(files) self.assertEqual([("python", 5), ("js", 2), ("ruby", 7)], [(k, len(v)) for k, v in result.items()]) return result
def fix_regexp_node(node: bblfsh.Node) -> bblfsh.Node: """ Workaround https://github.com/bblfsh/javascript-driver/issues/37. Should be removed as soon as issue closed and new driver is used. """ node.token = node.properties["pattern"] return node
def fix_string_literal_type_anotation(node: bblfsh.Node) -> bblfsh.Node: """ Workaround https://github.com/bblfsh/javascript-driver/issues/66. Should be removed as soon as issue closed and new driver is used. """ if node.token == "": node.token = node.properties["value"] return node
def fix_operator_node(node: bblfsh.Node) -> bblfsh.Node: """ Workaround https://github.com/bblfsh/javascript-driver/issues/65. Should be removed as soon as issue closed and new driver is used. """ if (node.start_position.offset + node.start_position.col + node.start_position.line + node.end_position.offset + node.end_position.col + node.end_position.line == 0): node.token = "" return node
def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--data', type=str, help="Path of the data.", required=True) parser.add_argument('-o', '--output', type=str, help="Path output to save the data.", required=False) args = parser.parse_args() data = args.data spark = SparkSession.builder \ .master("local[*]").appName("Examples") \ .getOrCreate() #engine = Engine(spark, "/home/hydra/projects/source_d/repositories/siva.srd/latest/*", "siva") # engine = Engine(spark, "/home/hydra/projects/source_d/repositories/siva.srd/latest/*", "siva") engine = Engine( spark, "/home/hydra/projects/source_d/data/selected_repositories/siva/latest/*", "siva") engine = Engine(spark, args.data, "siva") print("%d repositories successfully loaded" % (engine.repositories.count() / 2)) binary_uasts = engine.repositories.references.head_ref.commits.tree_entries.blobs \ .classify_languages().where('lang = "Python"') \ .extract_uasts().select('path', 'uast').rdd.filter(lambda r: len(r['uast']) > 0).collect() uasts = [] for b_uast in binary_uasts: uasts.append(Node.FromString(b_uast["uast"][0])) del binary_uasts rules_count, nodes_count = process_uasts(uasts) print_statistics(rules_count, nodes_count) cluster_nodes(nodes_count)
def test_is_relevant_node(): node = Node() node.start_position.line = 1 node.end_position.line = 1 assert is_relevant_node(node, lines=set([1])) assert is_relevant_node(node, lines=set([1, 2])) assert is_relevant_node(node, lines=set([1, 3])) assert not is_relevant_node(node, lines=set([2])) assert not is_relevant_node(node, lines=set([2, 3])) node.start_position.line = 2 node.end_position.line = 4 assert is_relevant_node(node, lines=set([2])) assert is_relevant_node(node, lines=set([4])) assert is_relevant_node(node, lines=set([3])) assert not is_relevant_node(node, lines=set([1, 5]))
def convert_uast(self, uast: bblfsh.Node) -> bblfsh.Node: """ Convert uast Nodes bytes position to unicode position. UAST is expected to correspond to provided content. :param uast: corresponding UAST. :return: UAST with unicode positions. """ uast = bblfsh.Node.FromString(uast.SerializeToString()) # deep copy the whole tree if not self._content: return uast for node in self._traverse_uast(uast): for position in (node.start_position, node.end_position): if position.offset == 0 and position.col == 0 and position.line == 0: continue new_position = self._convert_position(position) for attr in ("offset", "line", "col"): setattr(position, attr, getattr(new_position, attr)) return uast
def testFilterStartCol(self): node = Node() node.start_position.col = 50 self.assertTrue(any(filter(node, "//*[@startCol=50]"))) self.assertFalse(any(filter(node, "//*[@startCol=5]")))
def test_extract_subtrees_all_positions(): root = Node() root.internal_type = 'root' root.start_position.line = 1 root.end_position.line = 4 child1 = Node() child1.internal_type = '1' child1.start_position.line = 1 child1.end_position.line = 1 child2 = Node() child2.internal_type = '2' child2.start_position.line = 2 child2.end_position.line = 3 child2a = Node() child2a.internal_type = '2a' child2a.start_position.line = 2 child2a.end_position.line = 2 child2b = Node() child2b.internal_type = '2b' child2b.start_position.line = 3 child2b.end_position.line = 3 child3 = Node() child3.internal_type = '3' child3.start_position.line = 4 child3.end_position.line = 4 child2.children.extend([child2a, child2b]) root.children.extend([child1, child2, child3]) paths = [p for p in extract_paths(root, lines=set([1]))] assert 4 == len(paths) subtrees = [ s for s in extract_subtrees(root, min_depth=1, max_depth=1, min_size=1, max_size=100, lines=set([3])) ] assert [child2b] == subtrees subtrees = [ s for s in extract_subtrees(root, min_depth=1, max_depth=1, min_size=1, max_size=100, lines=set([4])) ] assert [child3] == subtrees subtrees = [ s for s in extract_subtrees(root, min_depth=1, max_depth=2, min_size=1, max_size=100, lines=set([3])) ] assert 2 == len(subtrees) assert child2b in subtrees assert child2 in subtrees subtrees = [ s for s in extract_subtrees(root, min_depth=1, max_depth=100, min_size=1, max_size=3, lines=set([3])) ] assert 2 == len(subtrees) assert child2b in subtrees assert child2 in subtrees subtrees = [ s for s in extract_subtrees(root, min_depth=1, max_depth=100, min_size=1, max_size=2, lines=set([3])) ] assert 1 == len(subtrees) assert child2b in subtrees deeper_root1 = Node() deeper_root2 = Node() deeper_root3 = Node() deeper_root3.children.extend([root]) deeper_root2.children.extend([deeper_root3]) deeper_root1.children.extend([deeper_root2]) subtrees = [ s for s in extract_subtrees(deeper_root1, min_depth=1, max_depth=2, min_size=1, max_size=100, lines=set([3])) ] assert 2 == len(subtrees) assert child2b in subtrees assert child2 in subtrees
def testFilterStartOffset(self): node = Node() node.start_position.offset = 100 self.assertTrue(any(filter(node, "//*[@startOffset=100]"))) self.assertFalse(any(filter(node, "//*[@startOffset=10]")))
def testFilterStartLine(self): node = Node() node.start_position.line = 10 self.assertTrue(any(filter(node, "//*[@startLine=10]"))) self.assertFalse(any(filter(node, "//*[@startLine=100]")))
def testFilterRoles(self): node = Node() node.roles.append(1) self.assertTrue(any(filter(node, "//*[@roleIdentifier]"))) self.assertFalse(any(filter(node, "//*[@roleQualified]")))
def testFilterEndLine(self): node = Node() node.end_position.line = 10 self.assertTrue(any(filter(node, "//*[@endLine=10]"))) self.assertFalse(any(filter(node, "//*[@endLine=100]")))
def testFilterEndOffset(self): node = Node() node.end_position.offset = 100 self.assertTrue(any(filter(node, "//*[@endOffset=100]"))) self.assertFalse(any(filter(node, "//*[@endOffset=10]")))
def testFilterToken(self): node = Node() node.token = 'a' self.assertTrue(any(filter(node, "//*[@token='a']"))) self.assertFalse(any(filter(node, "//*[@token='b']")))
def testFilterEndCol(self): node = Node() node.end_position.col = 50 self.assertTrue(any(filter(node, "//*[@endCol=50]"))) self.assertFalse(any(filter(node, "//*[@endCol=5]")))
def testFilterBool(self): node = Node() self.assertTrue( filter_bool(node, "boolean(//*[@startOffset or @endOffset])")) self.assertFalse(filter_bool(node, "boolean(//*[@blah])"))
def testFilterBadQuery(self): node = Node() self.assertRaises(RuntimeError, filter, node, "//*roleModule")
def testFilterString(self): node = Node() node.internal_type = "test" self.assertEqual(filter_string(node, "name(//*[1])"), "test")
def testFilterNumber(self): node = Node() node.children.extend([Node(), Node(), Node()]) self.assertEqual(int(filter_number(node, "count(//*)")), 4)
def testFilterBadType(self): node = Node() node.end_position.col = 50 self.assertRaises(RuntimeError, filter, node, "boolean(//*[@startPosition or @endPosition])")
def testFilterInternalType(self): node = Node() node.internal_type = 'a' self.assertTrue(any(filter(node, "//a"))) self.assertFalse(any(filter(node, "//b")))
def _itTestTree(self): root = Node() root.internal_type = 'root' root.start_position.offset = 0 root.start_position.line = 0 root.start_position.col = 1 son1 = Node() son1.internal_type = 'son1' son1.start_position.offset = 1 son1_1 = Node() son1_1.internal_type = 'son1_1' son1_1.start_position.offset = 10 son1_2 = Node() son1_2.internal_type = 'son1_2' son1_2.start_position.offset = 10 son1.children.extend([son1_1, son1_2]) son2 = Node() son2.internal_type = 'son2' son2.start_position.offset = 100 son2_1 = Node() son2_1.internal_type = 'son2_1' son2_1.start_position.offset = 5 son2_2 = Node() son2_2.internal_type = 'son2_2' son2_2.start_position.offset = 15 son2.children.extend([son2_1, son2_2]) root.children.extend([son1, son2]) return root
def _itTestTree(self): root = Node() root.internal_type = 'root' son1 = Node() son1.internal_type = 'son1' son1_1 = Node() son1_1.internal_type = 'son1_1' son1_2 = Node() son1_2.internal_type = 'son1_2' son1.children.extend([son1_1, son1_2]) son2 = Node() son2.internal_type = 'son2' son2_1 = Node() son2_1.internal_type = 'son2_1' son2_2 = Node() son2_2.internal_type = 'son2_2' son2.children.extend([son2_1, son2_2]) root.children.extend([son1, son2]) return root