Esempio n. 1
0
    def test_extract_tokens(self):
        df = self.session.createDataFrame(PYTHON_FILES, FILE_COLUMNS)
        repos = self.engine.repositories
        df = BlobsDataFrame(df._jdf, repos._session, repos._implicits)
        row = df.extract_uasts().query_uast('//*[@roleIdentifier and not(@roleIncomplete)]')\
            .extract_tokens().first()

        self.assertCountEqual(row["tokens"], ["contents", "read", "f", "open", "f"])
Esempio n. 2
0
    def test_uast_query(self):
        df = self.session.createDataFrame(PYTHON_FILES, FILE_COLUMNS)
        repos = self.engine.repositories
        df = BlobsDataFrame(df._jdf, repos._session, repos._implicits)
        rows = df.extract_uasts().query_uast('//*[@roleIdentifier and not(@roleIncomplete)]').collect()
        self.assertEqual(len(rows), 1)

        idents = []
        for row in rows:
            for node in row["result"]:
                node = parse_uast_node(node)
                idents.append(node.token)

        self.assertCountEqual(idents, ["contents", "read", "f", "open", "f"])