Ejemplo n.º 1
0
 def testFilterProperties(self):
     node = Node()
     node.properties['k1'] = 'v2'
     node.properties['k2'] = 'v1'
     self.assertTrue(any(filter(node, "//*[@k2='v1']")))
     self.assertTrue(any(filter(node, "//*[@k1='v2']")))
     self.assertFalse(any(filter(node, "//*[@k1='v1']")))
Ejemplo n.º 2
0
 def test_extract_changed_nodes(self):
     root = Node(
         start_position=Position(line=10),
         children=[Node(start_position=Position(line=5))])
     nodes = extract_changed_nodes(root, [10])
     self.assertEqual(len(nodes), 1)
     self.assertEqual(nodes[0].start_position.line, 10)
Ejemplo n.º 3
0
 def test_files_by_language(self):
     file_stats = {"js": 2, "ruby": 7, "Python": 5}
     files = []
     for language, n_files in file_stats.items():
         for i in range(n_files):
             files.append(File(language=language, uast=Node(children=[Node()]),
                               path=language + str(i)))
     result = files_by_language(files)
     self.assertEqual([("python", 5), ("js", 2), ("ruby", 7)],
                      [(k, len(v)) for k, v in result.items()])
     return result
Ejemplo n.º 4
0
def fix_regexp_node(node: bblfsh.Node) -> bblfsh.Node:
    """
    Workaround https://github.com/bblfsh/javascript-driver/issues/37.

    Should be removed as soon as issue closed and new driver is used.
    """
    node.token = node.properties["pattern"]
    return node
Ejemplo n.º 5
0
def fix_string_literal_type_anotation(node: bblfsh.Node) -> bblfsh.Node:
    """
    Workaround https://github.com/bblfsh/javascript-driver/issues/66.

    Should be removed as soon as issue closed and new driver is used.
    """
    if node.token == "":
        node.token = node.properties["value"]
    return node
Ejemplo n.º 6
0
def fix_operator_node(node: bblfsh.Node) -> bblfsh.Node:
    """
    Workaround https://github.com/bblfsh/javascript-driver/issues/65.

    Should be removed as soon as issue closed and new driver is used.
    """
    if (node.start_position.offset + node.start_position.col +
            node.start_position.line + node.end_position.offset +
            node.end_position.col + node.end_position.line == 0):
        node.token = ""
    return node
Ejemplo n.º 7
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--data',
                        type=str,
                        help="Path of the data.",
                        required=True)
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        help="Path output to save the data.",
                        required=False)

    args = parser.parse_args()
    data = args.data


    spark = SparkSession.builder \
        .master("local[*]").appName("Examples") \
        .getOrCreate()

    #engine = Engine(spark, "/home/hydra/projects/source_d/repositories/siva.srd/latest/*", "siva")
    # engine = Engine(spark, "/home/hydra/projects/source_d/repositories/siva.srd/latest/*", "siva")
    engine = Engine(
        spark,
        "/home/hydra/projects/source_d/data/selected_repositories/siva/latest/*",
        "siva")
    engine = Engine(spark, args.data, "siva")
    print("%d repositories successfully loaded" %
          (engine.repositories.count() / 2))

    binary_uasts = engine.repositories.references.head_ref.commits.tree_entries.blobs \
        .classify_languages().where('lang = "Python"') \
        .extract_uasts().select('path', 'uast').rdd.filter(lambda r: len(r['uast']) > 0).collect()

    uasts = []

    for b_uast in binary_uasts:
        uasts.append(Node.FromString(b_uast["uast"][0]))

    del binary_uasts

    rules_count, nodes_count = process_uasts(uasts)

    print_statistics(rules_count, nodes_count)

    cluster_nodes(nodes_count)
Ejemplo n.º 8
0
def test_is_relevant_node():
    node = Node()
    node.start_position.line = 1
    node.end_position.line = 1

    assert is_relevant_node(node, lines=set([1]))
    assert is_relevant_node(node, lines=set([1, 2]))
    assert is_relevant_node(node, lines=set([1, 3]))
    assert not is_relevant_node(node, lines=set([2]))
    assert not is_relevant_node(node, lines=set([2, 3]))

    node.start_position.line = 2
    node.end_position.line = 4
    assert is_relevant_node(node, lines=set([2]))
    assert is_relevant_node(node, lines=set([4]))
    assert is_relevant_node(node, lines=set([3]))
    assert not is_relevant_node(node, lines=set([1, 5]))
Ejemplo n.º 9
0
    def convert_uast(self, uast: bblfsh.Node) -> bblfsh.Node:
        """
        Convert uast Nodes bytes position to unicode position.

        UAST is expected to correspond to provided content.
        :param uast: corresponding UAST.
        :return: UAST with unicode positions.
        """
        uast = bblfsh.Node.FromString(uast.SerializeToString())  # deep copy the whole tree
        if not self._content:
            return uast
        for node in self._traverse_uast(uast):
            for position in (node.start_position, node.end_position):
                if position.offset == 0 and position.col == 0 and position.line == 0:
                    continue
                new_position = self._convert_position(position)
                for attr in ("offset", "line", "col"):
                    setattr(position, attr, getattr(new_position, attr))
        return uast
Ejemplo n.º 10
0
 def testFilterStartCol(self):
     node = Node()
     node.start_position.col = 50
     self.assertTrue(any(filter(node, "//*[@startCol=50]")))
     self.assertFalse(any(filter(node, "//*[@startCol=5]")))
Ejemplo n.º 11
0
def test_extract_subtrees_all_positions():
    root = Node()
    root.internal_type = 'root'
    root.start_position.line = 1
    root.end_position.line = 4

    child1 = Node()
    child1.internal_type = '1'
    child1.start_position.line = 1
    child1.end_position.line = 1

    child2 = Node()
    child2.internal_type = '2'
    child2.start_position.line = 2
    child2.end_position.line = 3
    child2a = Node()
    child2a.internal_type = '2a'
    child2a.start_position.line = 2
    child2a.end_position.line = 2
    child2b = Node()
    child2b.internal_type = '2b'
    child2b.start_position.line = 3
    child2b.end_position.line = 3

    child3 = Node()
    child3.internal_type = '3'
    child3.start_position.line = 4
    child3.end_position.line = 4

    child2.children.extend([child2a, child2b])
    root.children.extend([child1, child2, child3])

    paths = [p for p in extract_paths(root, lines=set([1]))]
    assert 4 == len(paths)

    subtrees = [
        s for s in extract_subtrees(root,
                                    min_depth=1,
                                    max_depth=1,
                                    min_size=1,
                                    max_size=100,
                                    lines=set([3]))
    ]
    assert [child2b] == subtrees

    subtrees = [
        s for s in extract_subtrees(root,
                                    min_depth=1,
                                    max_depth=1,
                                    min_size=1,
                                    max_size=100,
                                    lines=set([4]))
    ]
    assert [child3] == subtrees

    subtrees = [
        s for s in extract_subtrees(root,
                                    min_depth=1,
                                    max_depth=2,
                                    min_size=1,
                                    max_size=100,
                                    lines=set([3]))
    ]
    assert 2 == len(subtrees)
    assert child2b in subtrees
    assert child2 in subtrees

    subtrees = [
        s for s in extract_subtrees(root,
                                    min_depth=1,
                                    max_depth=100,
                                    min_size=1,
                                    max_size=3,
                                    lines=set([3]))
    ]
    assert 2 == len(subtrees)
    assert child2b in subtrees
    assert child2 in subtrees

    subtrees = [
        s for s in extract_subtrees(root,
                                    min_depth=1,
                                    max_depth=100,
                                    min_size=1,
                                    max_size=2,
                                    lines=set([3]))
    ]
    assert 1 == len(subtrees)
    assert child2b in subtrees

    deeper_root1 = Node()
    deeper_root2 = Node()
    deeper_root3 = Node()
    deeper_root3.children.extend([root])
    deeper_root2.children.extend([deeper_root3])
    deeper_root1.children.extend([deeper_root2])
    subtrees = [
        s for s in extract_subtrees(deeper_root1,
                                    min_depth=1,
                                    max_depth=2,
                                    min_size=1,
                                    max_size=100,
                                    lines=set([3]))
    ]
    assert 2 == len(subtrees)
    assert child2b in subtrees
    assert child2 in subtrees
Ejemplo n.º 12
0
 def testFilterStartOffset(self):
     node = Node()
     node.start_position.offset = 100
     self.assertTrue(any(filter(node, "//*[@startOffset=100]")))
     self.assertFalse(any(filter(node, "//*[@startOffset=10]")))
Ejemplo n.º 13
0
 def testFilterStartLine(self):
     node = Node()
     node.start_position.line = 10
     self.assertTrue(any(filter(node, "//*[@startLine=10]")))
     self.assertFalse(any(filter(node, "//*[@startLine=100]")))
Ejemplo n.º 14
0
 def testFilterRoles(self):
     node = Node()
     node.roles.append(1)
     self.assertTrue(any(filter(node, "//*[@roleIdentifier]")))
     self.assertFalse(any(filter(node, "//*[@roleQualified]")))
Ejemplo n.º 15
0
 def testFilterEndLine(self):
     node = Node()
     node.end_position.line = 10
     self.assertTrue(any(filter(node, "//*[@endLine=10]")))
     self.assertFalse(any(filter(node, "//*[@endLine=100]")))
Ejemplo n.º 16
0
 def testFilterEndOffset(self):
     node = Node()
     node.end_position.offset = 100
     self.assertTrue(any(filter(node, "//*[@endOffset=100]")))
     self.assertFalse(any(filter(node, "//*[@endOffset=10]")))
Ejemplo n.º 17
0
 def testFilterToken(self):
     node = Node()
     node.token = 'a'
     self.assertTrue(any(filter(node, "//*[@token='a']")))
     self.assertFalse(any(filter(node, "//*[@token='b']")))
Ejemplo n.º 18
0
 def testFilterEndCol(self):
     node = Node()
     node.end_position.col = 50
     self.assertTrue(any(filter(node, "//*[@endCol=50]")))
     self.assertFalse(any(filter(node, "//*[@endCol=5]")))
Ejemplo n.º 19
0
 def testFilterBool(self):
     node = Node()
     self.assertTrue(
         filter_bool(node, "boolean(//*[@startOffset or @endOffset])"))
     self.assertFalse(filter_bool(node, "boolean(//*[@blah])"))
Ejemplo n.º 20
0
 def testFilterBadQuery(self):
     node = Node()
     self.assertRaises(RuntimeError, filter, node, "//*roleModule")
Ejemplo n.º 21
0
 def testFilterString(self):
     node = Node()
     node.internal_type = "test"
     self.assertEqual(filter_string(node, "name(//*[1])"), "test")
Ejemplo n.º 22
0
 def testFilterNumber(self):
     node = Node()
     node.children.extend([Node(), Node(), Node()])
     self.assertEqual(int(filter_number(node, "count(//*)")), 4)
Ejemplo n.º 23
0
 def testFilterBadType(self):
     node = Node()
     node.end_position.col = 50
     self.assertRaises(RuntimeError, filter, node,
                       "boolean(//*[@startPosition or @endPosition])")
Ejemplo n.º 24
0
 def testFilterInternalType(self):
     node = Node()
     node.internal_type = 'a'
     self.assertTrue(any(filter(node, "//a")))
     self.assertFalse(any(filter(node, "//b")))
Ejemplo n.º 25
0
    def _itTestTree(self):
        root = Node()
        root.internal_type = 'root'
        root.start_position.offset = 0
        root.start_position.line = 0
        root.start_position.col = 1

        son1 = Node()
        son1.internal_type = 'son1'
        son1.start_position.offset = 1

        son1_1 = Node()
        son1_1.internal_type = 'son1_1'
        son1_1.start_position.offset = 10

        son1_2 = Node()
        son1_2.internal_type = 'son1_2'
        son1_2.start_position.offset = 10

        son1.children.extend([son1_1, son1_2])

        son2 = Node()
        son2.internal_type = 'son2'
        son2.start_position.offset = 100

        son2_1 = Node()
        son2_1.internal_type = 'son2_1'
        son2_1.start_position.offset = 5

        son2_2 = Node()
        son2_2.internal_type = 'son2_2'
        son2_2.start_position.offset = 15

        son2.children.extend([son2_1, son2_2])
        root.children.extend([son1, son2])

        return root
Ejemplo n.º 26
0
    def _itTestTree(self):
        root = Node()
        root.internal_type = 'root'
        son1 = Node()
        son1.internal_type = 'son1'

        son1_1 = Node()
        son1_1.internal_type = 'son1_1'

        son1_2 = Node()
        son1_2.internal_type = 'son1_2'
        son1.children.extend([son1_1, son1_2])

        son2 = Node()
        son2.internal_type = 'son2'
        son2_1 = Node()
        son2_1.internal_type = 'son2_1'

        son2_2 = Node()
        son2_2.internal_type = 'son2_2'
        son2.children.extend([son2_1, son2_2])

        root.children.extend([son1, son2])

        return root