def test_extract_tokens(self): df = self.session.createDataFrame(PYTHON_FILES, FILE_COLUMNS) repos = self.engine.repositories df = BlobsDataFrame(df._jdf, repos._session, repos._implicits) row = df.extract_uasts().query_uast('//*[@roleIdentifier and not(@roleIncomplete)]')\ .extract_tokens().first() self.assertCountEqual(row["tokens"], ["contents", "read", "f", "open", "f"])
def test_uast_query(self): df = self.session.createDataFrame(PYTHON_FILES, FILE_COLUMNS) repos = self.engine.repositories df = BlobsDataFrame(df._jdf, repos._session, repos._implicits) rows = df.extract_uasts().query_uast('//*[@roleIdentifier and not(@roleIncomplete)]').collect() self.assertEqual(len(rows), 1) idents = [] for row in rows: for node in row["result"]: node = parse_uast_node(node) idents.append(node.token) self.assertCountEqual(idents, ["contents", "read", "f", "open", "f"])