Beispiel #1
0
def prepare_files(filenames: Iterable[str], client: BblfshClient,
                  language: str) -> Iterable[File]:
    """
    Prepare the given folder for analysis by extracting UASTs and creating the gRPC wrappers.

    :param filenames: List of paths to files to analyze.
    :param client: Babelfish client. Babelfish server should be started accordingly.
    :param language: Language to consider. Will discard the other languages.
    :return: Iterator of File-s with content, uast, path and language set.
    """
    files = []
    for file in tqdm(filter_filepaths(list(filenames))):
        try:
            res = client.parse(file)
        except NonUTF8ContentException:
            # skip files that can't be parsed because of UTF-8 decoding errors.
            continue
        if res.status == 0 and res.language.lower() == language.lower():
            uast = res.uast
            path = file
            with open(file) as f:
                content = f.read().encode("utf-8")
            files.append(
                File(content=content,
                     uast=uast,
                     path=path,
                     language=res.language.lower()))
    return files
Beispiel #2
0
def return_features() -> Response:
    """Featurize the given code."""
    body = request.get_json()
    code = body["code"]
    babelfish_address = body["babelfish_address"]
    language = body["language"]
    client = BblfshClient(babelfish_address)
    res = client.parse(filename="", contents=code.encode(), language=language)
    if res.status != 0:
        abort(500)
    model = FormatModel().load(str(Path(__file__).parent / "models" / "model.asdf"))
    if language not in model:
        raise NotFittedError()
    rules = model[language]
    file = UnicodeFile(content=code, uast=res.uast, language="javascript", path="path")
    config = rules.origin_config["feature_extractor"]
    config["return_sibling_indices"] = True
    fe = FeatureExtractor(language=language, **config)
    res = fe.extract_features([file])
    if res is None:
        abort(500)
    X, y, (vnodes_y, vnodes, vnode_parents, node_parents, sibling_indices) = res
    y_pred, rule_winners, rules, grouped_quote_predictions = rules.predict(
        X=X, vnodes_y=vnodes_y, vnodes=vnodes, feature_extractor=fe)
    refuse_to_predict = y_pred < 0
    checker = UASTStabilityChecker(fe)
    _, _, _, _, safe_preds = checker.check(
        y=y, y_pred=y_pred, vnodes_y=vnodes_y, vnodes=vnodes, files=[file], stub=client._stub,
        vnode_parents=vnode_parents, node_parents=node_parents, rule_winners=rule_winners,
        grouped_quote_predictions=grouped_quote_predictions)
    break_uast = [False] * X.shape[0]
    for wrong_pred in set(range(X.shape[0])).difference(safe_preds):
        break_uast[wrong_pred] = True
    labeled_indices = {id(vnode): i for i, vnode in enumerate(vnodes_y)}
    app.logger.info("returning features of shape %d, %d" % X.shape)
    app.logger.info("length of rules: %d", len(rules))
    return jsonify({
        "code": code,
        "features": _input_matrix_to_descriptions(X, fe),
        "ground_truths": y.tolist(),
        "predictions": y_pred.tolist(),
        "refuse_to_predict": refuse_to_predict.tolist(),
        "sibling_indices": sibling_indices,
        "rules": _rules_to_jsonable(rules, fe),
        "winners": rule_winners.tolist(),
        "break_uast": break_uast,
        "feature_names": fe.feature_names,
        "class_representations": fe.composite_class_representations,
        "class_printables": fe.composite_class_printables,
        "vnodes": list(map(partial(_vnode_to_jsonable, labeled_indices=labeled_indices), vnodes)),
        "config": _mapping_to_jsonable(rules.origin_config)})
Beispiel #3
0
def prepare_file(filename: str, client: BblfshClient, language: str) -> File:
    """
    Prepare the given file for analysis by extracting UAST and creating the gRPC wrapper.

    :param filename: Path to the filename to analyze.
    :param client: Babelfish client. Babelfish server should be started accordingly.
    :param language: Language to consider. Will discard the other languages
    """
    assert os.path.isfile(filename), "\"%s\" should be a file" % filename
    res = client.parse(filename, language)
    assert res.status == 0, "Parse returned status %s for file %s" % (
        res.status, filename)
    error_log = "Language for % should be %s instead of %s"
    assert res.language.lower() == language.lower(), error_log % (
        filename, language, res.language)

    with open(filename) as f:
        content = f.read().encode("utf-8")

    return File(content=content, uast=res.uast, path=filename)
Beispiel #4
0
class BblfshTests(unittest.TestCase):
    BBLFSH_SERVER_EXISTED = None
    fixtures_pyfile = "fixtures/test.py"
    fixtures_cfile = "fixtures/test.c"

    @classmethod
    def setUpClass(cls: t.Any) -> None:
        cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running()

    @classmethod
    def tearDownClass(cls: t.Any) -> None:
        if not cls.BBLFSH_SERVER_EXISTED:
            client = docker.from_env(version="auto")
            client.containers.get("bblfshd").remove(force=True)
            client.api.close()

    def setUp(self) -> None:
        self.client = BblfshClient("localhost:9432")

    def _parse_fixture(self) -> ResultContext:
        ctx = self.client.parse(self.fixtures_pyfile)
        self._validate_ctx(ctx)
        return ctx

    def testVersion(self) -> None:
        version = self.client.version()
        self.assertTrue(hasattr(version, "version"))
        self.assertTrue(version.version)
        self.assertTrue(hasattr(version, "build"))
        self.assertTrue(version.build)

    def testNativeParse(self) -> None:
        ctx = self.client.parse(self.fixtures_pyfile, mode=Modes.NATIVE)
        self._validate_ctx(ctx)
        self.assertIsNotNone(ctx)

        it = ctx.filter("//*[@ast_type='NoopLine']")
        self.assertIsNotNone(it)
        self.assertIsInstance(it, NodeIterator)
        res = list(it)
        self.assertGreater(len(res), 1)
        for i in res:
            t = i.get_dict().get("ast_type")
            self.assertIsNotNone(t)
            self.assertEqual(t, "NoopLine")

    def testNonUTF8ParseError(self) -> None:
        self.assertRaises(NonUTF8ContentException, self.client.parse, "",
                          "Python", b"a = '\x80abc'")

    def testUASTDefaultLanguage(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(ctx.language, "python")

    def testUASTWithLanguage(self) -> None:
        ctx = self.client.parse(self.fixtures_pyfile, language="Python")
        self._validate_ctx(ctx)
        self.assertEqual(ctx.language, "python")

    def testUASTWithLanguageAlias(self) -> None:
        ctx = self.client.parse(self.fixtures_cfile)
        self._validate_ctx(ctx)
        self.assertEqual(ctx.language, "c")

        it = ctx.filter(
            "//uast:FunctionGroup/Nodes/uast:Alias/Name/uast:Identifier/Name")
        self.assertIsInstance(it, NodeIterator)

        self.assertEqual(next(it).get(), "main")
        self.assertEqual(next(it).get(), "fib")

    def testUASTFileContents(self) -> None:
        with open(self.fixtures_pyfile, "r") as fin:
            contents = fin.read()

        ctx = self.client.parse("file.py", contents=contents)
        self._validate_ctx(ctx)

        def assert_strnode(n: Node, expected: str) -> None:
            self.assertEqual(n.get(), expected)
            self.assertIsInstance(n.get_str(), str)
            self.assertEqual(n.get_str(), expected)

        it = ctx.filter("//uast:RuntimeImport/Path/uast:Identifier/Name")
        self.assertIsInstance(it, NodeIterator)

        assert_strnode(next(it), "os")
        assert_strnode(next(it), "resource")
        assert_strnode(next(it), "unittest")
        assert_strnode(next(it), "docker")
        assert_strnode(next(it), "bblfsh")
        assert_strnode(next(it), "bblfsh")
        self.assertRaises(StopIteration, next, it)

    def testBrokenFilter(self) -> None:
        ctx = self._parse_fixture()

        self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$")

    def testFilterToken(self):
        ctx = self._parse_fixture()
        it = ctx.filter("//*[@token='else']/text()")
        first = next(it).get_str()
        self.assertEqual(first, "else")

    def testFilterRoles(self) -> None:
        ctx = self._parse_fixture()
        it = ctx.filter("//*[@role='Identifier']")
        self.assertIsInstance(it, NodeIterator)

        l = list(it)
        self.assertGreater(len(l), 0)

        it = ctx.filter("//*[@role='Friend']")
        self.assertIsInstance(it, NodeIterator)
        l = list(it)
        self.assertEqual(len(l), 0)

    def testFilterProperties(self) -> None:
        ctx = uast()
        obj = {"k1": "v1", "k2": "v2"}
        self.assertTrue(any(ctx.filter("/*[@k1='v1']", obj)))
        self.assertTrue(any(ctx.filter("/*[@k2='v2']", obj)))
        self.assertFalse(any(ctx.filter("/*[@k2='v1']", obj)))
        self.assertFalse(any(ctx.filter("/*[@k1='v2']", obj)))

    def testFilterStartOffset(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            any(
                ctx.filter(
                    "//uast:Positions/start/uast:Position[@offset=11749]")))
        self.assertFalse(
            any(
                ctx.filter(
                    "//uast:Positions/start/uast:Position[@offset=99999]")))

    def testFilterStartLine(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            any(ctx.filter("//uast:Positions/start/uast:Position[@line=295]")))
        self.assertFalse(
            any(ctx.filter(
                "//uast:Positions/start/uast:Position[@line=99999]")))

    def testFilterStartCol(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            any(ctx.filter("//uast:Positions/start/uast:Position[@col=42]")))
        self.assertFalse(
            any(ctx.filter(
                "//uast:Positions/start/uast:Position[@col=99999]")))

    def testFilterEndOffset(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            any(ctx.filter(
                "//uast:Positions/end/uast:Position[@offset=11757]")))
        self.assertFalse(
            any(ctx.filter(
                "//uast:Positions/end/uast:Position[@offset=99999]")))

    def testFilterEndLine(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            any(ctx.filter("//uast:Positions/end/uast:Position[@line=321]")))
        self.assertFalse(
            any(ctx.filter("//uast:Positions/end/uast:Position[@line=99999]")))

    def testFilterEndCol(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            any(ctx.filter("//uast:Positions/end/uast:Position[@col=49]")))
        self.assertFalse(
            any(ctx.filter("//uast:Positions/end/uast:Position[@col=99999]")))

    def testFilterBool(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(
            ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])"))
        self.assertTrue(
            next(
                ctx.filter(
                    "boolean(//uast:Positions/end/uast:Position[@col=49])")).
            get())
        self.assertTrue(
            next(
                ctx.filter(
                    "boolean(//uast:Positions/end/uast:Position[@col=49])")).
            get_bool())

        self.assertFalse(
            next(
                ctx.filter(
                    "boolean(//uast:Positions/end/uast:Position[@col=9999])")).
            get())
        self.assertFalse(
            next(
                ctx.filter(
                    "boolean(//uast:Positions/end/uast:Position[@col=9999])")).
            get_bool())

    def testFilterNumber(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(
            next(
                ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])"
                           )).get(), 2)
        self.assertEqual(
            next(
                ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])"
                           )).get_int(), 2)
        self.assertEqual(
            next(
                ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])"
                           )).get_float(), 2.0)

    def testFilterString(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(
            next(ctx.filter("name(//uast:Positions)")).get(), "uast:Positions")
        self.assertEqual(
            next(ctx.filter("name(//uast:Positions)")).get_str(),
            "uast:Positions")

    def testFilterBadQuery(self) -> None:
        ctx = uast()
        self.assertRaises(RuntimeError, ctx.filter, "//[@roleModule]", {})

    def testFilterBadType(self) -> None:
        ctx = self._parse_fixture()
        res = next(
            ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])"))
        self.assertRaises(NodeTypedGetException, res.get_str)

    def testRoleIdName(self) -> None:
        self.assertEqual(role_id(role_name(1)), 1)
        self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER")

    @staticmethod
    def _itTestTree() -> dict:
        def set_position(node: dict, start_offset: int, start_line: int,
                         start_col: int, end_offset: int, end_line: int,
                         end_col: int) -> None:
            node["@pos"] = {
                "@type": "uast:Positions",
                "start": {
                    "@type": "uast:Position",
                    "offset": start_offset,
                    "line": start_line,
                    "col": start_col
                },
                "end": {
                    "@type": "uast:Position",
                    "offset": end_offset,
                    "line": end_line,
                    "col": end_col
                }
            }

        root = {"@type": "root"}
        set_position(root, 0, 1, 1, 1, 1, 2)

        son1 = {"@type": "son1"}
        set_position(son1, 2, 2, 2, 3, 2, 3)

        son1_1 = {"@type": "son1_1"}
        set_position(son1_1, 10, 10, 1, 12, 2, 2)

        son1_2 = {"@type": "son1_2"}
        set_position(son1_2, 10, 10, 1, 12, 2, 2)

        son1["children"] = [son1_1, son1_2]

        son2 = {"@type": "son2"}
        set_position(son2, 100, 100, 1, 101, 100, 2)

        son2_1 = {"@type": "son2_1"}
        set_position(son2_1, 5, 5, 1, 6, 5, 2)

        son2_2 = {"@type": "son2_2"}
        set_position(son2_2, 15, 15, 1, 16, 15, 2)

        son2["children"] = [son2_1, son2_2]
        root["children"] = [son1, son2]

        return root

    @staticmethod
    def _get_nodetypes(iterator: NodeIterator) -> t.List[str]:
        return [
            n["@type"] for n in filter(lambda x: isinstance(x, dict), iterator)
        ]

    def testIteratorPreOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.PRE_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(
            expanded,
            ['root', 'son1', 'son1_1', 'son1_2', 'son2', 'son2_1', 'son2_2'])

    def testIteratorPostOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.POST_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(
            expanded,
            ['son1_1', 'son1_2', 'son1', 'son2_1', 'son2_2', 'son2', 'root'])

    def testIteratorLevelOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.LEVEL_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(
            expanded,
            ['root', 'son1', 'son2', 'son1_1', 'son1_2', 'son2_1', 'son2_2'])

    def testIteratorPositionOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.POSITION_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(
            expanded,
            ['root', 'son1', 'son2_1', 'son1_1', 'son1_2', 'son2_2', 'son2'])

    def _validate_ctx(self, ctx: ResultContext) -> None:
        self.assertIsNotNone(ctx)
        self.assertIsInstance(ctx, ResultContext)
        self.assertIsInstance(ctx.uast, Node)

    def testFilterInsideIter(self) -> None:
        ctx = self._parse_fixture()
        c2 = uast()
        for n in ctx.iterate(TreeOrder.PRE_ORDER):
            c2.filter("//uast:Positions", n)

    def testItersMixingIterations(self) -> None:
        ctx = self._parse_fixture()

        it = ctx.iterate(TreeOrder.PRE_ORDER)
        next(it)
        next(it)
        next(it)
        next(it)

        it2 = it.iterate(TreeOrder.PRE_ORDER)
        next(it2)

        a = next(it).get()
        b = next(it2).get()
        self.assertEqual(a, b)

    def testManyFilters(self) -> None:
        ctx = self._parse_fixture()

        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(10000):
            ctx.filter("//*[@role='Identifier']")

        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled
        self.assertLess(after[2] / before[2], 2.0)

    def testManyParses(self) -> None:
        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(100):
            self.client.parse(self.fixtures_pyfile)

        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled
        self.assertLess(after[2] / before[2], 2.0)

    def testManyParsesAndFilters(self) -> None:
        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(100):
            ctx = self.client.parse(self.fixtures_pyfile)
            ctx.filter("//*[@role='Identifier']")

        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled
        self.assertLess(after[2] / before[2], 2.0)

    def testSupportedLanguages(self) -> None:
        res = self.client.supported_languages()
        self.assertGreater(len(res), 0)
        for l in res:
            for key in ('language', 'version', 'status', 'features'):
                self.assertTrue(hasattr(l, key))
                self.assertIsNotNone(getattr(l, key))

    def testEncode(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(ctx.ctx.encode(None, 0), ctx._response.uast)

    def testEncodeWithEmptyContext(self) -> None:
        ctx = ResultContext()
        obj = {"k1": "v1", "k2": "v2"}
        fmt = 1  # YAML

        data = ctx.ctx.encode(obj, fmt)
        self.assertDictEqual(obj, decode(data, format=fmt).load())

    def testGetAll(self) -> None:
        ctx = self._parse_fixture()

        expected = ["os", "resource", "unittest", "docker", "bblfsh"]
        actual = []
        for k in ctx.get_all()["body"]:
            if "@type" in k and k[
                    "@type"] == "uast:RuntimeImport" and "Path" in k:
                path = k["Path"]
                if "Name" in path:
                    actual.append(k["Path"]["Name"])

        self.assertListEqual(expected, actual)

    def testLoad(self) -> None:
        ctx = self._parse_fixture()

        it = ctx.iterate(TreeOrder.PRE_ORDER)
        next(it)
        next(it)
        next(it)
        next(it)

        it2 = it.iterate(TreeOrder.PRE_ORDER)
        n = next(it2)
        node_ext = n.node_ext

        obj = node_ext.load()
        typ = obj["@type"]
        self.assertEqual("uast:RuntimeImport", typ)

        path = obj["Path"]
        self.assertEqual("uast:Identifier", path["@type"])
        self.assertEqual("os", path["Name"])
Beispiel #5
0
class Parser:
    """Parse files into list of nodes."""

    _bblfsh_language: str
    _parser_reserved: Pattern
    _parser_space: Pattern
    _uast_fixers: Optional[Dict[str, Callable[[BblfshNode], None]]]
    _convert_to_utf8: bool
    _logger: Logger

    def __init_subclass__(
        cls,
        bblfsh_language: str,
        reserved: List[str],
        uast_fixers: Optional[Dict[str, Callable[[BblfshNode], None]]] = None,
        convert_to_utf8: bool = True,
    ) -> None:
        cls._bblfsh_language = bblfsh_language
        cls._parser_reserved = re_compile(
            "|".join(re_escape(i) for i in sorted(reserved, reverse=True))
        )
        cls._parser_space = re_compile(r"\s+")
        cls._uast_fixers = uast_fixers if uast_fixers else {}
        cls._convert_to_utf8 = convert_to_utf8
        cls._logger = getLogger(cls.__name__)

    def __init__(
        self,
        bblfshd_endpoint: str = environ.get("BBLFSHD_ENDPOINT", "0.0.0.0:9432"),
        split_formatting: bool = False,
    ) -> None:
        """Construct a parser."""
        for attr in [
            "_bblfsh_language",
            "_parser_reserved",
            "_parser_space",
            "_uast_fixers",
        ]:
            if not hasattr(self, attr):

                raise NotImplementedError(
                    f"The {self.__class__.__name__} is a base class and should not be "
                    "used directly."
                )
        self._bblfsh_client = BblfshClient(bblfshd_endpoint)
        self._split_formatting = split_formatting

    @property
    def split_formatting(self) -> bool:
        return self._split_formatting

    def parse(self, repository_path: Path, file_path: Path) -> Nodes:
        """
        Parse a file into a list of `Node`s.

        :param repository_path: Path of the folder that contains the file to parse.
        :param file_path: Path of the file to parse.
        :return: List of parsed `Node`s.
        """
        response = self._bblfsh_client.parse(
            str(repository_path / file_path), language=self._bblfsh_language
        )
        if response.status != 0:
            self._logger.warn(
                "Could not process file %s, errors: %s",
                file_path,
                "; ".join(response.errors),
            )
            raise ParsingException(
                f"Could not process file {file_path}, "
                f"errors: {'; '.join(response.errors)}"
            )
        file_content = (repository_path / file_path).read_text(
            encoding="utf-8", errors="replace"
        )
        bblfsh_node_converter = BblfshNodeConverter(
            file_content, convert_to_utf8=self._convert_to_utf8
        )
        root_node = bblfsh_node_converter.bblfsh_node_to_node(response.uast, None)
        to_visit = [(response.uast, root_node)]
        non_formatting_tokens = []
        while to_visit:
            current_bblfsh_node, current_node = to_visit.pop()
            if current_bblfsh_node.internal_type in self._uast_fixers:
                current_bblfsh_node = self._uast_fixers[
                    current_bblfsh_node.internal_type
                ](current_bblfsh_node)
                if current_bblfsh_node is None:
                    continue
            to_visit.extend(
                (
                    bblfsh_child,
                    bblfsh_node_converter.bblfsh_node_to_node(
                        bblfsh_child, current_node
                    ),
                )
                for bblfsh_child in current_bblfsh_node.children
            )
            if (
                current_node.token
                and not current_bblfsh_node.children
                and (current_node.start is not None and current_node.end is not None)
            ):
                non_formatting_tokens.append(current_node)
        sentinel = Node(
            token=None,
            internal_type="Sentinel",
            roles=[],
            parent=None,
            start=len(file_content),
            end=len(file_content),
        )
        non_formatting_tokens.append(sentinel)

        pos = 0
        tokens = []
        for node in sorted(non_formatting_tokens, key=lambda n: n.start):
            if node.start < pos:
                continue
            if node.start > pos:
                sumlen = 0
                diff = file_content[pos : node.start]
                additional_nodes = []
                for match in self._parser_reserved.finditer(diff):
                    token = match.group()
                    additional_nodes.append(
                        Node(
                            start=match.start() + pos,
                            end=match.end() + pos,
                            token=token,
                            parent=None,
                            internal_type=token.title(),
                            roles=[match.group().upper()],
                        )
                    )
                    sumlen += len(token)
                for match in self._parser_space.finditer(diff):
                    token = match.group()
                    assert token.isspace()
                    additional_nodes.append(
                        Node(
                            start=match.start() + pos,
                            end=match.end() + pos,
                            token=token,
                            parent=None,
                            internal_type=FORMATTING_INTERNAL_TYPE,
                            roles=[FORMATTING_ROLE],
                        )
                    )
                    sumlen += len(token)
                if sumlen != node.start - pos:
                    self._logger.warn(f"missed some imaginary tokens: {diff}")
                    raise ParsingException(f"missed some imaginary tokens: {diff}")
                tokens.extend(sorted(additional_nodes, key=lambda n: n.start))
            if node is sentinel:
                break
            tokens.append(node)
            pos = node.end

        tokens = self._augment_tokens(tokens)

        closest_left_node = None
        for i, token_node in enumerate(tokens):
            if token_node.parent is not None:
                closest_left_node = token_node
            else:
                found_parent = self._find_parent(i, tokens, closest_left_node)
                token_node.parent = (
                    found_parent if found_parent is not None else root_node
                )

        if self._split_formatting:
            tokens = self._perform_split_formatting(tokens)

        reconstructed_file_content = "".join(node.token for node in tokens)

        if file_content != reconstructed_file_content:
            diff = "".join(
                unified_diff(
                    file_content.splitlines(keepends=True),
                    reconstructed_file_content.splitlines(keepends=True),
                    fromfile="original",
                    tofile="reconstructed",
                )
            )
            self._logger.warn("reconstructed file is not equal to original:\n%s", diff)
        return Nodes.from_token_nodes(tokens)

    def _augment_tokens(self, tokens: List[Node]) -> List[Node]:
        augmented_tokens = []

        if not tokens or tokens[0].internal_type != FORMATTING_INTERNAL_TYPE:
            augmented_tokens.append(
                Node(
                    start=0,
                    end=0,
                    token="",
                    parent=None,
                    internal_type=FORMATTING_INTERNAL_TYPE,
                    roles=[FORMATTING_ROLE],
                )
            )
        if tokens:
            augmented_tokens.append(tokens[0])

        for previous_token, next_token in zip(
            islice(tokens, 0, None), islice(tokens, 1, None)
        ):
            assert previous_token.end == next_token.start
            if (
                previous_token.internal_type != FORMATTING_INTERNAL_TYPE
                and next_token.internal_type != FORMATTING_INTERNAL_TYPE
            ):
                augmented_tokens.append(
                    Node(
                        start=previous_token.end,
                        end=previous_token.end,
                        token="",
                        parent=None,
                        internal_type=FORMATTING_INTERNAL_TYPE,
                        roles=[FORMATTING_ROLE],
                    )
                )
            augmented_tokens.append(next_token)

        if tokens and tokens[-1].internal_type != FORMATTING_INTERNAL_TYPE:
            augmented_tokens.append(
                Node(
                    start=tokens[-1].end,
                    end=tokens[-1].end,
                    token="",
                    parent=None,
                    internal_type=FORMATTING_INTERNAL_TYPE,
                    roles=[FORMATTING_ROLE],
                )
            )
        return augmented_tokens

    @staticmethod
    def _find_parent(
        node_index: int, nodes: List[Node], closest_left_node: Optional[Node]
    ) -> Optional[Node]:
        """
        Compute a node's parent as the LCA of the closest left and right nodes.

        :param node_index: Index of the node for which to find a parent.
        :param nodes: Sequence of token `Node`-s.
        :param closest_left_node: Closest node on the left with a true parent.
        :return: The Node of the found parent or None if no parent was found.
        """
        if closest_left_node is None:
            return None
        left_ancestor_ids = set()
        current_left_ancestor = closest_left_node.parent
        while current_left_ancestor is not None:
            left_ancestor_ids.add(id(current_left_ancestor))
            current_left_ancestor = current_left_ancestor.parent

        for future_node in nodes[node_index + 1 :]:
            if future_node.parent is not None:
                break
        else:
            return None
        current_right_ancestor = future_node.parent
        while current_right_ancestor is not None:
            if id(current_right_ancestor) in left_ancestor_ids:
                return current_right_ancestor
            current_right_ancestor = current_right_ancestor.parent
        return None

    def _perform_split_formatting(self, nodes: List[Node]) -> List[Node]:
        """
        Split each formatting node into a list of one node per character.

        :param nodes: Sequence of token `Node`-s.
        :return: The new sequence, with split formatting nodes.
        """
        new_nodes = []
        for node in nodes:
            if node.internal_type == FORMATTING_INTERNAL_TYPE and node.token:
                for i, char in enumerate(node.token):
                    new_nodes.append(
                        Node(
                            token=char,
                            internal_type=node.internal_type,
                            roles=node.roles,
                            parent=node.parent,
                            start=node.start + i,
                            end=node.start + i + 1,
                        )
                    )
            else:
                new_nodes.append(node)
        return new_nodes

    def __del__(self) -> None:
        if self._bblfsh_client:
            self._bblfsh_client._channel.close()
            self._bblfsh_client._channel = self._bblfsh_client._stub = None
Beispiel #6
0
def parse_files(filepaths: Sequence[str], line_length_limit: int,
                overall_size_limit: int, client: BblfshClient, language: str,
                random_state: int = 7, progress_tracker: Callable = lambda x: x,
                log: Optional[logging.Logger] = None) -> Iterable[File]:
    """
    Parse files with Babelfish.

    If a file has lines longer than `line_length_limit`, it is skipped. If the summed size of \
    parsed files exceeds `overall_size_limit` the rest of the files is skipped. Files paths are \
    filtered with `filter_files_by_path()`. The order in which the files are parsed is random - \
    and hence different from `filepaths`.

    :param filepaths: File paths to filter.
    :param line_length_limit: Maximum line length to accept a file.
    :param overall_size_limit: Maximum cumulative files size in bytes. \
                               The files are discarded after reaching this limit.
    :param client: Babelfish client instance. The Babelfish server should be running.
    :param language: Language to consider. Will discard the other languages.
    :param random_state: Random generator state for shuffling the files.
    :param progress_tracker: Optional progress metric whenn iterating over the input files.
    :param log: Logger to use to report the number of excluded files.
    :return: `File`-s with parsed UASTs and which passed through the filters.
    """
    def load_file(path):
        with open(path, "rb") as f:
            return f.read()

    random.seed(random_state)
    filepaths_filtered = list(filter_files_by_path(filepaths))
    files_filtered_by_line_length = sorted(
        filter_files_by_line_length(filepaths_filtered, load_file, line_length_limit))
    files_filtered_by_line_length = random.sample(files_filtered_by_line_length,
                                                  k=len(files_filtered_by_line_length))
    size, n_parsed = 0, 0
    size_passed = []
    for filename in progress_tracker(files_filtered_by_line_length):
        try:
            res = client.parse(filename)
        except NonUTF8ContentException:
            # skip files that can't be parsed because of UTF-8 decoding errors.
            continue
        if res.status == 0 and res.language.lower() == language.lower():
            n_parsed += 1
            with open(filename, "rb") as f:
                content = f.read()
            size += len(content)
            if size > overall_size_limit:
                break
            uast = res.uast
            path = filename
            size_passed.append(File(content=content, uast=uast, path=path,
                                    language=res.language.lower()))
    if log is not None:
        log.debug("excluded %d/%d files based on their path",
                  len(filepaths) - len(filepaths_filtered), len(filepaths))
        log.debug("excluded %d/%d %s files by max line length %d",
                  len(filepaths_filtered) - len(files_filtered_by_line_length),
                  len(filepaths_filtered), language, line_length_limit)
        log.debug("excluded %d/%d %s files due to parsing problems",
                  len(files_filtered_by_line_length) - n_parsed,
                  len(files_filtered_by_line_length), language)
        log.debug("excluded %d/%d %s files by max overall size %d",
                  n_parsed - len(size_passed), n_parsed, language,
                  overall_size_limit)
    return size_passed
Beispiel #7
0
class BblfshTests(unittest.TestCase):
    BBLFSH_SERVER_EXISTED = None

    @classmethod
    def setUpClass(cls):
        cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running()

    @classmethod
    def tearDownClass(cls):
        if not cls.BBLFSH_SERVER_EXISTED:
            client = docker.from_env(version="auto")
            client.containers.get("bblfshd").remove(force=True)
            client.api.close()

    def setUp(self):
        self.client = BblfshClient("0.0.0.0:9432")

    def testVersion(self):
        version = self.client.version()
        self.assertTrue(hasattr(version, "version"))
        self.assertTrue(version.version)
        self.assertTrue(hasattr(version, "build"))
        self.assertTrue(version.build)

    def testNativeParse(self):
        reply = self.client.native_parse(__file__)
        assert (reply.ast)

    def testNonUTF8ParseError(self):
        self.assertRaises(NonUTF8ContentException, self.client.parse, "",
                          "Python", b"a = '\x80abc'")

    def testUASTDefaultLanguage(self):
        self._validate_resp(self.client.parse(__file__))

    def testUASTPython(self):
        self._validate_resp(self.client.parse(__file__, language="Python"))

    def testUASTFileContents(self):
        with open(__file__, "rb") as fin:
            contents = fin.read()
        resp = self.client.parse("file.py", contents=contents)
        self._validate_resp(resp)
        self._validate_filter(resp)

    def testBrokenFilter(self):
        self.assertRaises(RuntimeError, filter, 0, "foo")

    def testFilterInternalType(self):
        node = Node()
        node.internal_type = 'a'
        self.assertTrue(any(filter(node, "//a")))
        self.assertFalse(any(filter(node, "//b")))

    def testFilterToken(self):
        node = Node()
        node.token = 'a'
        self.assertTrue(any(filter(node, "//*[@token='a']")))
        self.assertFalse(any(filter(node, "//*[@token='b']")))

    def testFilterRoles(self):
        node = Node()
        node.roles.append(1)
        self.assertTrue(any(filter(node, "//*[@roleIdentifier]")))
        self.assertFalse(any(filter(node, "//*[@roleQualified]")))

    def testFilterProperties(self):
        node = Node()
        node.properties['k1'] = 'v2'
        node.properties['k2'] = 'v1'
        self.assertTrue(any(filter(node, "//*[@k2='v1']")))
        self.assertTrue(any(filter(node, "//*[@k1='v2']")))
        self.assertFalse(any(filter(node, "//*[@k1='v1']")))

    def testFilterStartOffset(self):
        node = Node()
        node.start_position.offset = 100
        self.assertTrue(any(filter(node, "//*[@startOffset=100]")))
        self.assertFalse(any(filter(node, "//*[@startOffset=10]")))

    def testFilterStartLine(self):
        node = Node()
        node.start_position.line = 10
        self.assertTrue(any(filter(node, "//*[@startLine=10]")))
        self.assertFalse(any(filter(node, "//*[@startLine=100]")))

    def testFilterStartCol(self):
        node = Node()
        node.start_position.col = 50
        self.assertTrue(any(filter(node, "//*[@startCol=50]")))
        self.assertFalse(any(filter(node, "//*[@startCol=5]")))

    def testFilterEndOffset(self):
        node = Node()
        node.end_position.offset = 100
        self.assertTrue(any(filter(node, "//*[@endOffset=100]")))
        self.assertFalse(any(filter(node, "//*[@endOffset=10]")))

    def testFilterEndLine(self):
        node = Node()
        node.end_position.line = 10
        self.assertTrue(any(filter(node, "//*[@endLine=10]")))
        self.assertFalse(any(filter(node, "//*[@endLine=100]")))

    def testFilterEndCol(self):
        node = Node()
        node.end_position.col = 50
        self.assertTrue(any(filter(node, "//*[@endCol=50]")))
        self.assertFalse(any(filter(node, "//*[@endCol=5]")))

    def testFilterBool(self):
        node = Node()
        self.assertTrue(
            filter_bool(node, "boolean(//*[@startOffset or @endOffset])"))
        self.assertFalse(filter_bool(node, "boolean(//*[@blah])"))

    def testFilterNumber(self):
        node = Node()
        node.children.extend([Node(), Node(), Node()])
        self.assertEqual(int(filter_number(node, "count(//*)")), 4)

    def testFilterString(self):
        node = Node()
        node.internal_type = "test"
        self.assertEqual(filter_string(node, "name(//*[1])"), "test")

    def testFilterBadQuery(self):
        node = Node()
        self.assertRaises(RuntimeError, filter, node, "//*roleModule")

    def testFilterBadType(self):
        node = Node()
        node.end_position.col = 50
        self.assertRaises(RuntimeError, filter, node,
                          "boolean(//*[@startPosition or @endPosition])")

    def testRoleIdName(self):
        self.assertEqual(role_id(role_name(1)), 1)
        self.assertEqual(role_name(role_id("IDENTIFIER")), "IDENTIFIER")

    def _itTestTree(self):
        root = Node()
        root.internal_type = 'root'
        root.start_position.offset = 0
        root.start_position.line = 0
        root.start_position.col = 1

        son1 = Node()
        son1.internal_type = 'son1'
        son1.start_position.offset = 1

        son1_1 = Node()
        son1_1.internal_type = 'son1_1'
        son1_1.start_position.offset = 10

        son1_2 = Node()
        son1_2.internal_type = 'son1_2'
        son1_2.start_position.offset = 10

        son1.children.extend([son1_1, son1_2])

        son2 = Node()
        son2.internal_type = 'son2'
        son2.start_position.offset = 100

        son2_1 = Node()
        son2_1.internal_type = 'son2_1'
        son2_1.start_position.offset = 5

        son2_2 = Node()
        son2_2.internal_type = 'son2_2'
        son2_2.start_position.offset = 15

        son2.children.extend([son2_1, son2_2])
        root.children.extend([son1, son2])

        return root

    def testIteratorPreOrder(self):
        root = self._itTestTree()
        it = iterator(root, TreeOrder.PRE_ORDER)
        self.assertIsNotNone(it)
        expanded = [node.internal_type for node in it]
        self.assertListEqual(
            expanded,
            ['root', 'son1', 'son1_1', 'son1_2', 'son2', 'son2_1', 'son2_2'])

    def testIteratorPostOrder(self):
        root = self._itTestTree()
        it = iterator(root, TreeOrder.POST_ORDER)
        self.assertIsNotNone(it)
        expanded = [node.internal_type for node in it]
        self.assertListEqual(
            expanded,
            ['son1_1', 'son1_2', 'son1', 'son2_1', 'son2_2', 'son2', 'root'])

    def testIteratorLevelOrder(self):
        root = self._itTestTree()
        it = iterator(root, TreeOrder.LEVEL_ORDER)
        self.assertIsNotNone(it)
        expanded = [node.internal_type for node in it]
        self.assertListEqual(
            expanded,
            ['root', 'son1', 'son2', 'son1_1', 'son1_2', 'son2_1', 'son2_2'])

    def testIteratorPositionOrder(self):
        root = self._itTestTree()
        it = iterator(root, TreeOrder.POSITION_ORDER)
        self.assertIsNotNone(it)
        expanded = [node.internal_type for node in it]
        self.assertListEqual(
            expanded,
            ['root', 'son1', 'son2_1', 'son1_1', 'son1_2', 'son2_2', 'son2'])

    def _validate_resp(self, resp):
        self.assertIsNotNone(resp)
        self.assertEqual(
            type(resp).DESCRIPTOR.full_name,
            ParseResponse.DESCRIPTOR.full_name)
        self.assertEqual(len(resp.errors), 0)
        # self.assertIsInstance() does not work - must be some metaclass magic
        # self.assertIsInstance(resp.uast, Node)

        # Sometimes its fully qualified, sometimes is just "Node"... ditto
        self.assertTrue(resp.uast.__class__.__name__.endswith('Node'))

    def testFilterInsideIter(self):
        root = self.client.parse(__file__).uast
        it = iterator(root, TreeOrder.PRE_ORDER)
        self.assertIsNotNone(it)
        for n in it:
            filter(n, "//*[@roleIdentifier]")

    def testItersMixingIterations(self):
        root = self.client.parse(__file__).uast
        it = iterator(root, TreeOrder.PRE_ORDER)
        next(it)
        next(it)
        next(it)
        n = next(it)
        it2 = iterator(n, TreeOrder.PRE_ORDER)
        next(it2)
        assert (next(it) == next(it2))

    def testManyFilters(self):
        root = self.client.parse(__file__).uast
        root.properties['k1'] = 'v2'
        root.properties['k2'] = 'v1'

        import resource
        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(100):
            filter(root, "//*[@roleIdentifier]")
        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled after running the filter
        self.assertLess(after[2] / before[2], 2.0)

    def _validate_filter(self, resp):
        results = filter(resp.uast,
                         "//Import[@roleImport and @roleDeclaration]//alias")
        self.assertEqual(next(results).token, "os")
        self.assertEqual(next(results).token, "unittest")
        self.assertEqual(next(results).token, "docker")
Beispiel #8
0
class BblfshTests(unittest.TestCase):
    BBLFSH_SERVER_EXISTED = None
    fixtures_pyfile = "fixtures/test.py"
    fixtures_cfile = "fixtures/test.c"

    @classmethod
    def setUpClass(cls: t.Any) -> None:
        cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running()

    @classmethod
    def tearDownClass(cls: t.Any) -> None:
        if not cls.BBLFSH_SERVER_EXISTED:
            client = docker.from_env(version="auto")
            client.containers.get("bblfshd").remove(force=True)
            client.api.close()

    def setUp(self) -> None:
        self.client = BblfshClient("localhost:9432")

    def _parse_fixture(self) -> ResultContext:
        ctx = self.client.parse(self.fixtures_pyfile)
        self._validate_ctx(ctx)
        return ctx

    def testVersion(self) -> None:
        version = self.client.version()
        self.assertTrue(hasattr(version, "version"))
        self.assertTrue(version.version)
        self.assertTrue(hasattr(version, "build"))
        self.assertTrue(version.build)

    def testNativeParse(self) -> None:
        ctx = self.client.parse(self.fixtures_pyfile, mode=Modes.NATIVE)
        self._validate_ctx(ctx)
        self.assertIsNotNone(ctx)

        it = ctx.filter("//*[@ast_type='NoopLine']")
        self.assertIsNotNone(it)
        self.assertIsInstance(it, NodeIterator)
        res = list(it)
        self.assertGreater(len(res), 1)
        for i in res:
            t = i.get_dict().get("ast_type")
            self.assertIsNotNone(t)
            self.assertEqual(t, "NoopLine")

    def testNonUTF8ParseError(self) -> None:
        self.assertRaises(NonUTF8ContentException,
                          self.client.parse, "", "Python", b"a = '\x80abc'")

    def testUASTDefaultLanguage(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(ctx.language, "python")

    def testUASTWithLanguage(self) -> None:
        ctx = self.client.parse(self.fixtures_pyfile, language="Python")
        self._validate_ctx(ctx)
        self.assertEqual(ctx.language, "python")

    def testUASTWithLanguageAlias(self) -> None:
        ctx = self.client.parse(self.fixtures_cfile)
        self._validate_ctx(ctx)
        self.assertEqual(ctx.language, "c")

        it = ctx.filter("//uast:FunctionGroup/Nodes/uast:Alias/Name/uast:Identifier/Name")
        self.assertIsInstance(it, NodeIterator)

        self.assertEqual(next(it).get(), "main")
        self.assertEqual(next(it).get(), "fib")


    def testUASTFileContents(self) -> None:
        with open(self.fixtures_pyfile, "r") as fin:
            contents = fin.read()

        ctx = self.client.parse("file.py", contents=contents)
        self._validate_ctx(ctx)

        def assert_strnode(n: Node, expected: str) -> None:
            self.assertEqual(n.get(), expected)
            self.assertIsInstance(n.get_str(), str)
            self.assertEqual(n.get_str(), expected)

        it = ctx.filter("//uast:RuntimeImport/Path/uast:Identifier/Name")
        self.assertIsInstance(it, NodeIterator)

        assert_strnode(next(it), "os")
        assert_strnode(next(it), "resource")
        assert_strnode(next(it), "unittest")
        assert_strnode(next(it), "docker")
        assert_strnode(next(it), "bblfsh")
        assert_strnode(next(it), "bblfsh")
        self.assertRaises(StopIteration, next, it)

    def testBrokenFilter(self) -> None:
        ctx = self._parse_fixture()

        self.assertRaises(RuntimeError, ctx.filter, "dsdfkj32423#$@#$")

    def testFilterToken(self):
        ctx = self._parse_fixture()
        it = ctx.filter("//*[@token='else']/text()")
        first = next(it).get_str()
        self.assertEqual(first, "else")

    def testFilterRoles(self) -> None:
        ctx = self._parse_fixture()
        it = ctx.filter("//*[@role='Identifier']")
        self.assertIsInstance(it, NodeIterator)

        l = list(it)
        self.assertGreater(len(l), 0)

        it = ctx.filter("//*[@role='Friend']")
        self.assertIsInstance(it, NodeIterator)
        l = list(it)
        self.assertEqual(len(l), 0)

    def testFilterProperties(self) -> None:
        ctx = uast()
        obj = {"k1": "v1", "k2": "v2"}
        self.assertTrue(any(ctx.filter("/*[@k1='v1']", obj)))
        self.assertTrue(any(ctx.filter("/*[@k2='v2']", obj)))
        self.assertFalse(any(ctx.filter("/*[@k2='v1']", obj)))
        self.assertFalse(any(ctx.filter("/*[@k1='v2']", obj)))

    def testFilterStartOffset(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@offset=11749]")))
        self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@offset=99999]")))

    def testFilterStartLine(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@line=295]")))
        self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@line=99999]")))

    def testFilterStartCol(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(any(ctx.filter("//uast:Positions/start/uast:Position[@col=42]")))
        self.assertFalse(any(ctx.filter("//uast:Positions/start/uast:Position[@col=99999]")))

    def testFilterEndOffset(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@offset=11757]")))
        self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@offset=99999]")))

    def testFilterEndLine(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@line=321]")))
        self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@line=99999]")))

    def testFilterEndCol(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(any(ctx.filter("//uast:Positions/end/uast:Position[@col=49]")))
        self.assertFalse(any(ctx.filter("//uast:Positions/end/uast:Position[@col=99999]")))

    def testFilterBool(self) -> None:
        ctx = self._parse_fixture()
        self.assertTrue(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])"))
        self.assertTrue(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")).get())
        self.assertTrue(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=49])")).get_bool())

        self.assertFalse(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=9999])")).get())
        self.assertFalse(next(ctx.filter("boolean(//uast:Positions/end/uast:Position[@col=9999])")).get_bool())

    def testFilterNumber(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get(), 2)
        self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get_int(), 2)
        self.assertEqual(next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])")).get_float(), 2.0)

    def testFilterString(self) -> None:
        ctx = self._parse_fixture()
        self.assertEqual(next(ctx.filter("name(//uast:Positions)")).get(), "uast:Positions")
        self.assertEqual(next(ctx.filter("name(//uast:Positions)")).get_str(), "uast:Positions")

    def testFilterBadQuery(self) -> None:
        ctx = uast()
        self.assertRaises(RuntimeError, ctx.filter, "//[@roleModule]", {})

    def testFilterBadType(self) -> None:
        ctx = self._parse_fixture()
        res = next(ctx.filter("count(//uast:Positions/end/uast:Position[@col=49])"))
        self.assertRaises(NodeTypedGetException, res.get_str)

    def testRoleIdName(self) -> None:
        self.assertEqual(role_id(role_name(1)), 1)
        self.assertEqual(role_name(role_id("IDENTIFIER")),  "IDENTIFIER")

    @staticmethod
    def _itTestTree() -> dict:
        def set_position(node: dict, start_offset: int, start_line: int, start_col: int,
                         end_offset: int, end_line: int, end_col: int) -> None:
            node["@pos"] = {
                "@type": "uast:Positions",
                "start": {
                    "@type": "uast:Position",
                    "offset": start_offset,
                    "line": start_line,
                    "col": start_col
                },
                "end": {
                    "@type": "uast:Position",
                    "offset": end_offset,
                    "line": end_line,
                    "col": end_col
                }
            }

        root = {"@type": "root"}
        set_position(root, 0,1,1, 1,1,2)

        son1 = {"@type": "son1"}
        set_position(son1, 2,2,2, 3,2,3)

        son1_1 = {"@type": "son1_1"}
        set_position(son1_1, 10,10,1, 12,2,2)

        son1_2 = {"@type": "son1_2"}
        set_position(son1_2, 10,10,1, 12,2,2)

        son1["children"] = [son1_1, son1_2]

        son2 = {"@type": "son2"}
        set_position(son2, 100,100,1,  101,100,2)

        son2_1 = {"@type": "son2_1"}
        set_position(son2_1, 5,5,1, 6,5,2)

        son2_2 = {"@type": "son2_2"}
        set_position(son2_2, 15,15,1, 16,15,2)

        son2["children"] = [son2_1, son2_2]
        root["children"] = [son1, son2]

        return root

    @staticmethod
    def _get_nodetypes(iterator: NodeIterator) -> t.List[str]:
        return [n["@type"] for n in
                filter(lambda x: isinstance(x, dict), iterator)]

    @staticmethod
    def _get_nodes(iterator: NodeIterator) -> t.List[dict]:
        return [n.get() for n in iterator]

    @staticmethod
    def _get_positions(iterator: NodeIterator):
        startPositions = [ n["@pos"]["start"] for n in
                           filter(lambda x: isinstance(x, dict) and
                                  "@pos" in x.keys() and
                                  "start" in x["@pos"].keys(), iterator) ]
        return [ (int(n["offset"]), int(n["line"]), int(n["col"])) for n in startPositions ]

    def decrefAndGC(self, obj) -> None:
        del obj
        gc.collect()

    def testIteratorPreOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.PRE_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2',
                                        'son2', 'son2_1', 'son2_2'])

    def testIteratorPostOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.POST_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1',
                                        'son2_2', 'son2', 'root'])

    def testIteratorLevelOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.LEVEL_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1',
                                        'son1_2', 'son2_1', 'son2_2'])

    def testIteratorPositionOrder(self) -> None:
        # Check first our homemade tree
        root = self._itTestTree()
        it = iterator(root, TreeOrder.POSITION_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1',
                                        'son1_2', 'son2_2', 'son2'])
        # Check that when using the positional order the positions we get are
        # in fact sorted by (offset, line, col)
        it = iterator(root, TreeOrder.POSITION_ORDER)
        positions = self._get_positions(it)
        self.assertListEqual(positions, [(0,1,1), (2,2,2), (5,5,1), (10,10,1),
                                         (10,10,1), (15,15,1), (100,100,1)])

    def testAnyOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.ANY_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        # We only can test that the order gives us all the nodes
        self.assertEqual(set(expanded), {'root', 'son1', 'son2', 'son1_1',
                                         'son1_2', 'son2_1', 'son2_2'})

    def testChildrenOrder(self) -> None:
        root = self._itTestTree()
        it = iterator(root, TreeOrder.CHILDREN_ORDER)
        self.assertIsNotNone(it)
        expanded = self._get_nodetypes(it)
        # We only can test that the order gives us all the nodes
        self.assertEqual(expanded, ['son1', 'son2'])

    # Iterating from the root node should give the same result as
    # iterating from the tree, for every available node
    def testNodeIteratorEqualsCtxIterator(self) -> None:
        ctx = self._parse_fixture()
        root = ctx.root

        for order in TreeOrder:
            itCtx  = ctx.iterate(order)
            itRoot = root.iterate(order)
            self.assertListEqual(self._get_nodes(itCtx), self._get_nodes(itRoot))

    def _validate_ctx(self, ctx: ResultContext) -> None:
        self.assertIsNotNone(ctx)
        self.assertIsInstance(ctx, ResultContext)
        self.assertIsInstance(ctx.uast, Node)

    def testFilterInsideIter(self) -> None:
        ctx = self._parse_fixture()
        c2 = uast()
        for n in ctx.iterate(TreeOrder.PRE_ORDER):
            c2.filter("//uast:Positions", n)

    def testItersMixingIterations(self) -> None:
        ctx = self._parse_fixture()

        it = ctx.iterate(TreeOrder.PRE_ORDER)
        next(it); next(it); next(it); next(it)

        it2 = it.iterate(TreeOrder.PRE_ORDER)
        next(it2)

        a = next(it).get()
        b = next(it2).get()
        self.assertEqual(a, b)

    def testManyFilters(self) -> None:
        ctx = self._parse_fixture()

        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(10000):
            ctx.filter("//*[@role='Identifier']")

        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled
        self.assertLess(after[2] / before[2], 2.0)

    def testManyParses(self) -> None:
        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(100):
            self.client.parse(self.fixtures_pyfile)

        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled
        self.assertLess(after[2] / before[2], 2.0)

    def testManyParsesAndFilters(self) -> None:
        before = resource.getrusage(resource.RUSAGE_SELF)
        for _ in range(100):
            ctx = self.client.parse(self.fixtures_pyfile)
            ctx.filter("//*[@role='Identifier']")

        after = resource.getrusage(resource.RUSAGE_SELF)

        # Check that memory usage has not doubled
        self.assertLess(after[2] / before[2], 2.0)

    def testSupportedLanguages(self) -> None:
        res = self.client.supported_languages()
        self.assertGreater(len(res), 0)
        for l in res:
            for key in ('language', 'version', 'status', 'features'):
                self.assertTrue(hasattr(l, key))
                self.assertIsNotNone(getattr(l, key))

    def testEncode(self) -> None:
        ctx = self._parse_fixture()
        # This test is here for backward compatibility purposes,
        # in case someone was relying on encoding contexts this way
        self.assertEqual(ctx.ctx.encode(None, 0), ctx._response.uast)
        self.assertEqual(ctx.encode(), ctx._response.uast)

    def testEncodeWithEmptyContext(self) -> None:
        ctx = ResultContext()
        obj = {"k1": "v1", "k2": "v2"}
        fmt = 1 # YAML

        # This test is here for backward compatibility purposes,
        # in case someone was relying on encoding contexts this way
        data = ctx.ctx.encode(obj, fmt)
        other_data = ctx.encode(obj, fmt)
        self.assertDictEqual(obj, decode(data, format = fmt).load())
        self.assertDictEqual(obj, decode(other_data, format = fmt).load())

    def testGetAll(self) -> None:
        ctx = self._parse_fixture()

        expected = ["os", "resource", "unittest", "docker", "bblfsh"]
        actual = []
        for k in ctx.get_all()["body"]:
            if "@type" in k and k["@type"] == "uast:RuntimeImport" and "Path" in k:
                path = k["Path"]
                if "Name" in path:
                    actual.append(k["Path"]["Name"])

        self.assertListEqual(expected, actual)

    def testLoad(self) -> None:
        ctx = self._parse_fixture()

        it = ctx.iterate(TreeOrder.PRE_ORDER)
        next(it); next(it); next(it); next(it)

        it2 = it.iterate(TreeOrder.PRE_ORDER)
        n = next(it2)
        node_ext = n.node_ext

        obj = node_ext.load()
        typ = obj["@type"]
        self.assertEqual("uast:RuntimeImport", typ)

        path = obj["Path"]
        self.assertEqual("uast:Identifier", path["@type"])
        self.assertEqual("os", path["Name"])

    # The following testOrphan{x} methods verifies that iterators and nodes work
    # correctly once the context they come from has been DECREFed. Loading an
    # (external) node and filtering it after the context / iterators have been
    # DECREFed are also checked. As an example, the following code should work
    # in Python:
    #
    # its = []
    # for file in files:
    #    ctx = client.parse(file)
    #    it = ctx.filter("blablablah")
    #    its.append(it)
    #
    # it = pick a it from its
    # node = next(it)
    #
    # Instead of testing with a while, we can just delete ctx before doing
    # something with the iterator
    def testOrphanFilter(self) -> None:
        ctx = self._parse_fixture()
        it = ctx.filter("//uast:RuntimeImport")
        self.decrefAndGC(ctx)
        # We should be able to retrieve values from the iterator
        # after the context has been DECREFed but the iterator
        # still exists
        obj = next(it).get()
        typ = obj["@type"]
        self.assertEqual("uast:RuntimeImport", typ)

        # Chaining calls has the same effect as splitting
        # the effect across different lines as above
        self.decrefAndGC(it)
        it = self._parse_fixture().filter("//uast:RuntimeImport")
        next(it)
        obj = next(it).get()
        typ = obj["@type"]
        self.assertEqual("uast:RuntimeImport", typ)

    def testOrphanIterator(self) -> None:
        ctx = self._parse_fixture()
        it = ctx.iterate(TreeOrder.PRE_ORDER)
        self.decrefAndGC(ctx)
        # We should be able to retrieve values from the iterator
        # after the context has been DECREFed but the iterator
        # still exists
        obj = next(it).get()
        self.assertIsInstance(obj, dict)

        # Chaining calls has the same effect as splitting
        # the effect across different lines as above
        self.decrefAndGC(it)
        it = self._parse_fixture().iterate(TreeOrder.POST_ORDER)
        obj = next(it)
        self.assertIsInstance(obj, Node)

    def testLoadOrphanNode(self) -> None:
        ctx = self._parse_fixture()
        it = ctx.iterate(TreeOrder.PRE_ORDER)
        # The underlying ctx should not be deallocated even if ctx goes
        # out of scope because the iterator is still alive
        self.decrefAndGC(ctx)
        next(it); next(it); next(it);
        node = next(it)
        self.decrefAndGC(it)
        # Context should not have been deallocated yet because we
        # want to iterate from the node onwards
        it2 = node.iterate(TreeOrder.PRE_ORDER)
        node_ext = node.node_ext
        # node could be deallocated here also, if we by, any chance,
        # we happen to be storing only the external nodes
        self.decrefAndGC(node)
        obj = node_ext.load()
        typ = obj["@type"]
        self.assertEqual("uast:RuntimeImport", typ)

    def testFilterOrphanNode(self) -> None:
        ctx = self._parse_fixture()
        root = ctx.root
        self.decrefAndGC(ctx)
        # filter should work here over the tree even if we ctx has
        # been DECREFed by the interpreter (it has gone out of scope)
        it = root.filter("//uast:RuntimeImport")
        obj = next(it).get()
        typ = obj["@type"]
        self.assertEqual("uast:RuntimeImport", typ)

    def testPythonContextIterate(self) -> None:
        # C++ memory context
        ctxC = self._parse_fixture()
        # Python memory context
        pyDict = ctxC.root.get()
        ctxPy = bblfsh.context(pyDict)

        for treeOrder in TreeOrder:
            itC = ctxC.iterate(treeOrder)
            itPy = ctxPy.iterate(treeOrder)

            for nodeC, nodePy in zip(itC, itPy):
                self.assertEqual(nodeC.get(), nodePy)

    def testPythonContextFilter(self) -> None:
        # C++ memory context
        ctxC = self._parse_fixture()
        # Python memory context
        pyDict = ctxC.root.get()
        ctxPy = bblfsh.context(pyDict)

        itC = ctxC.filter("//*[@role='Identifier']")
        itPy = ctxPy.filter("//*[@role='Identifier']")

        for nodeC, nodePy in zip(itC, itPy):
            self.assertEqual(nodeC.get(), nodePy)

    def testBinaryEncodeDecodePythonContext(self) -> None:
        # Binary encoding should be invertible
        # C++ memory context
        ctxC = self._parse_fixture()
        # Python memory context
        pyDict = ctxC.root.get()
        ctxPy = bblfsh.context(pyDict)
        encoded = ctxPy.encode(fmt = 0) # Binary encoding
        decoded = decode(encoded, format = 0)

        self.assertEqual(pyDict, decoded.load())

    def testInvalidDecodeBytes(self) -> None:
        with self.assertRaises(RuntimeError):
            decode(b'', format = 0)
        with self.assertRaises(RuntimeError):
            decode(b'abcdef', format = 0)
Beispiel #9
0
class BblfshTests(unittest.TestCase):
    BBLFSH_SERVER_EXISTED = None

    @classmethod
    def setUpClass(cls):
        cls.BBLFSH_SERVER_EXISTED = ensure_bblfsh_is_running()

    @classmethod
    def tearDownClass(cls):
        if not cls.BBLFSH_SERVER_EXISTED:
            client = docker.from_env(version="auto")
            client.containers.get("bblfshd").remove(force=True)
            client.api.close()

    def setUp(self):
        self.client = BblfshClient("0.0.0.0:9432")

    def testVersion(self):
        version = self.client.version()
        self.assertTrue(hasattr(version, "version"))
        self.assertTrue(version.version)
        self.assertTrue(hasattr(version, "build"))
        self.assertTrue(version.build)

    # def testNativeParse(self):
    #     reply = self.client.native_parse(__file__)
    #     assert(reply.ast)
    #
    def testNonUTF8ParseError(self):
        self.assertRaises(NonUTF8ContentException, self.client.parse, "",
                          "Python", b"a = '\x80abc'")

    #
    def testUASTDefaultLanguage(self):
        self._validate_ctx(self.client.parse(__file__))

    def testUASTPython(self):
        ctx = self.client.parse(__file__, language="Python")
        self._validate_ctx(ctx)
        self.assertEqual(ctx.language, "python")

    def testUASTFileContents(self):
        with open(__file__, "rb") as fin:
            contents = fin.read()
        ctx = self.client.parse("file.py", contents=contents)
        self._validate_ctx(ctx)
        self._validate_filter(ctx)

    #
    # def testBrokenFilter(self):
    #     self.assertRaises(RuntimeError, filter, 0, "foo")
    #
    # def testFilterInternalType(self):
    #     node = Node()
    #     node.internal_type = 'a'
    #     self.assertTrue(any(filter(node, "//a")))
    #     self.assertFalse(any(filter(node, "//b")))
    #
    # def testFilterToken(self):
    #     node = Node()
    #     node.token = 'a'
    #     self.assertTrue(any(filter(node, "//*[@token='a']")))
    #     self.assertFalse(any(filter(node, "//*[@token='b']")))
    #
    # def testFilterRoles(self):
    #     node = Node()
    #     node.roles.append(1)
    #     self.assertTrue(any(filter(node, "//*[@roleIdentifier]")))
    #     self.assertFalse(any(filter(node, "//*[@roleQualified]")))
    #
    # def testFilterProperties(self):
    #     node = Node()
    #     node.properties['k1'] = 'v2'
    #     node.properties['k2'] = 'v1'
    #     self.assertTrue(any(filter(node, "//*[@k2='v1']")))
    #     self.assertTrue(any(filter(node, "//*[@k1='v2']")))
    #     self.assertFalse(any(filter(node, "//*[@k1='v1']")))
    #
    # def testFilterStartOffset(self):
    #     node = Node()
    #     node.start_position.offset = 100
    #     self.assertTrue(any(filter(node, "//*[@startOffset=100]")))
    #     self.assertFalse(any(filter(node, "//*[@startOffset=10]")))
    #
    # def testFilterStartLine(self):
    #     node = Node()
    #     node.start_position.line = 10
    #     self.assertTrue(any(filter(node, "//*[@startLine=10]")))
    #     self.assertFalse(any(filter(node, "//*[@startLine=100]")))
    #
    # def testFilterStartCol(self):
    #     node = Node()
    #     node.start_position.col = 50
    #     self.assertTrue(any(filter(node, "//*[@startCol=50]")))
    #     self.assertFalse(any(filter(node, "//*[@startCol=5]")))
    #
    # def testFilterEndOffset(self):
    #     node = Node()
    #     node.end_position.offset = 100
    #     self.assertTrue(any(filter(node, "//*[@endOffset=100]")))
    #     self.assertFalse(any(filter(node, "//*[@endOffset=10]")))
    #
    # def testFilterEndLine(self):
    #     node = Node()
    #     node.end_position.line = 10
    #     self.assertTrue(any(filter(node, "//*[@endLine=10]")))
    #     self.assertFalse(any(filter(node, "//*[@endLine=100]")))
    #
    # def testFilterEndCol(self):
    #     node = Node()
    #     node.end_position.col = 50
    #     self.assertTrue(any(filter(node, "//*[@endCol=50]")))
    #     self.assertFalse(any(filter(node, "//*[@endCol=5]")))
    #
    # def testFilterBool(self):
    #     node = Node()
    #     self.assertTrue(filter_bool(node, "boolean(//*[@startOffset or @endOffset])"))
    #     self.assertFalse(filter_bool(node, "boolean(//*[@blah])"))
    #
    # def testFilterNumber(self):
    #     node = Node()
    #     node.children.extend([Node(), Node(), Node()])
    #     self.assertEqual(int(filter_number(node, "count(//*)")), 4)
    #
    # def testFilterString(self):
    #     node = Node()
    #     node.internal_type = "test"
    #     self.assertEqual(filter_string(node, "name(//*[1])"), "test")
    #
    # def testFilterBadQuery(self):
    #     node = Node()
    #     self.assertRaises(RuntimeError, filter, node, "//*roleModule")
    #
    # def testFilterBadType(self):
    #     node = Node()
    #     node.end_position.col = 50
    #     self.assertRaises(RuntimeError, filter, node, "boolean(//*[@startPosition or @endPosition])")
    #
    # def testRoleIdName(self):
    #     self.assertEqual(role_id(role_name(1)), 1)
    #     self.assertEqual(role_name(role_id("IDENTIFIER")),  "IDENTIFIER")
    #
    # def _itTestTree(self):
    #     root = Node()
    #     root.internal_type = 'root'
    #     root.start_position.offset = 0
    #     root.start_position.line = 0
    #     root.start_position.col = 1
    #
    #     son1 = Node()
    #     son1.internal_type = 'son1'
    #     son1.start_position.offset = 1
    #
    #     son1_1 = Node()
    #     son1_1.internal_type = 'son1_1'
    #     son1_1.start_position.offset = 10
    #
    #     son1_2 = Node()
    #     son1_2.internal_type = 'son1_2'
    #     son1_2.start_position.offset = 10
    #
    #     son1.children.extend([son1_1, son1_2])
    #
    #     son2 = Node()
    #     son2.internal_type = 'son2'
    #     son2.start_position.offset = 100
    #
    #     son2_1 = Node()
    #     son2_1.internal_type = 'son2_1'
    #     son2_1.start_position.offset = 5
    #
    #     son2_2 = Node()
    #     son2_2.internal_type = 'son2_2'
    #     son2_2.start_position.offset = 15
    #
    #     son2.children.extend([son2_1, son2_2])
    #     root.children.extend([son1, son2])
    #
    #     return root
    #
    # def testIteratorPreOrder(self):
    #     root = self._itTestTree()
    #     it = iterator(root, TreeOrder.PRE_ORDER)
    #     self.assertIsNotNone(it)
    #     expanded = [node.internal_type for node in it]
    #     self.assertListEqual(expanded, ['root', 'son1', 'son1_1', 'son1_2',
    #                                     'son2', 'son2_1', 'son2_2'])
    #
    # def testIteratorPostOrder(self):
    #     root = self._itTestTree()
    #     it = iterator(root, TreeOrder.POST_ORDER)
    #     self.assertIsNotNone(it)
    #     expanded = [node.internal_type for node in it]
    #     self.assertListEqual(expanded, ['son1_1', 'son1_2', 'son1', 'son2_1',
    #                                     'son2_2', 'son2', 'root'])
    #
    # def testIteratorLevelOrder(self):
    #     root = self._itTestTree()
    #     it = iterator(root, TreeOrder.LEVEL_ORDER)
    #     self.assertIsNotNone(it)
    #     expanded = [node.internal_type for node in it]
    #     self.assertListEqual(expanded, ['root', 'son1', 'son2', 'son1_1',
    #                                     'son1_2', 'son2_1', 'son2_2'])
    #
    # def testIteratorPositionOrder(self):
    #     root = self._itTestTree()
    #     it = iterator(root, TreeOrder.POSITION_ORDER)
    #     self.assertIsNotNone(it)
    #     expanded = [node.internal_type for node in it]
    #     self.assertListEqual(expanded, ['root', 'son1', 'son2_1', 'son1_1',
    #                                     'son1_2', 'son2_2', 'son2'])
    #
    def _validate_ctx(self, ctx):
        import bblfsh
        self.assertIsNotNone(ctx)
        self.assertIsInstance(ctx, bblfsh.result_context.ResultContext)
        self.assertIsInstance(ctx.uast, bytes)

    # def testFilterInsideIter(self):
    #     root = self.client.parse(__file__).uast
    #     it = iterator(root, TreeOrder.PRE_ORDER)
    #     self.assertIsNotNone(it)
    #     for n in it:
    #         filter(n, "//*[@roleIdentifier]")
    #
    # def testItersMixingIterations(self):
    #     root = self.client.parse(__file__).uast
    #     it = iterator(root, TreeOrder.PRE_ORDER)
    #     next(it); next(it); next(it)
    #     n = next(it)
    #     it2 = iterator(n, TreeOrder.PRE_ORDER)
    #     next(it2)
    #     assert(next(it) == next(it2))
    #
    # def testManyFilters(self):
    #     root = self.client.parse(__file__).uast
    #     root.properties['k1'] = 'v2'
    #     root.properties['k2'] = 'v1'
    #
    #     before = resource.getrusage(resource.RUSAGE_SELF)
    #     for _ in range(500):
    #         filter(root, "//*[@roleIdentifier]")
    #
    #     after = resource.getrusage(resource.RUSAGE_SELF)
    #
    #     # Check that memory usage has not doubled after running the filter
    #     self.assertLess(after[2] / before[2], 2.0)
    #
    # def testManyParses(self):
    #     before = resource.getrusage(resource.RUSAGE_SELF)
    #     for _ in range(100):
    #         root = self.client.parse(__file__).uast
    #         root.properties['k1'] = 'v2'
    #         root.properties['k2'] = 'v1'
    #
    #     after = resource.getrusage(resource.RUSAGE_SELF)
    #
    #     # Check that memory usage has not doubled after running the parse+filter
    #     self.assertLess(after[2] / before[2], 2.0)
    #
    # def testManyParsersAndFilters(self):
    #     before = resource.getrusage(resource.RUSAGE_SELF)
    #     for _ in range(100):
    #         root = self.client.parse(__file__).uast
    #         root.properties['k1'] = 'v2'
    #         root.properties['k2'] = 'v1'
    #
    #         filter(root, "//*[@roleIdentifier]")
    #
    #     after = resource.getrusage(resource.RUSAGE_SELF)
    #
    #     # Check that memory usage has not doubled after running the parse+filter
    #     self.assertLess(after[2] / before[2], 2.0)
    #
    # def testSupportedLanguages(self):
    #     res = self.client.supported_languages()
    #     self.assertGreater(len(res), 0)
    #     for l in res:
    #         for key in ('language', 'version', 'status', 'features'):
    #             print(key)
    #             self.assertTrue(hasattr(l, key))
    #             self.assertIsNotNone(getattr(l, key))

    def _validate_filter(self, ctx):
        def assert_strnode(n: Node, expected: str) -> None:
            self.assertEqual(n.get(), expected)
            self.assertIsInstance(n.get_str(), str)
            self.assertEqual(n.get_str(), expected)

        # print(ctx)
        it = ctx.filter(
            "//uast:RuntimeImport/Path/uast:Alias/Name/uast:Identifier/Name")
        self.assertIsInstance(it, NodeIterator)
        # wtf = next(it)
        # print(type(wtf))
        # print(wtf)

        assert_strnode(next(it), "os")
        assert_strnode(next(it), "resource")
        assert_strnode(next(it), "unittest")
        assert_strnode(next(it), "docker")
        assert_strnode(next(it), "bblfsh")
        self.assertRaises(StopIteration, next(it))