def testHeadAndTail(self): res = self.odps_df.head(2) self.assertEqual(len(res), 2) df = self.odps_df[self.odps_df['name'] == 'name1'] res = df.head(1) self.assertEqual(len(res), 1) self.assertTrue(context.is_cached(df)) res = self.odps_df.tail(2) self.assertEqual(len(res), 2) self.assertTrue(all(it > 1 for it in res.values['id'])) self.assertEqual(len(self.odps_df.name.head(2)), 2) self.assertEqual(len(self.odps_df.name.tail(2)), 2) res = self.pd_df.head(1) self.assertEqual(len(res), 1) df = self.pd_df[self.pd_df['name'] == 'name1'] res = df.head(1) self.assertEqual(len(res), 1) self.assertTrue(context.is_cached(df)) res = self.pd_df.tail(1) self.assertEqual(len(res), 1) self.assertEqual(res.values['id'][0], 6) self.assertEqual(len(self.pd_df.name.head(1)), 1) self.assertEqual(len(self.pd_df.name.tail(1)), 1) class TunnelOnlyODPSEngine(ODPSSQLEngine): def _do_execute(self, *args, **kwargs): kwargs['_force_tunnel'] = True return super(TunnelOnlyODPSEngine, self)._do_execute(*args, **kwargs) engine = MixedEngine(self.odps) engine._odpssql_engine = TunnelOnlyODPSEngine(self.odps) res = engine.execute(self.odps_df['id'], head=3) self.assertIsNotNone(res) self.assertEqual(sum(res.values['id']), 6) table_name = tn('pyodps_df_mixed2') self.odps.delete_table(table_name, if_exists=True) table = next(self.odps_df.data_source()) table2 = self.odps.create_table(table_name, table.schema) try: res = DataFrame(table2).head(10) self.assertEqual(len(res), 0) finally: table2.drop()
def testHeadAndTail(self): res = self.odps_df.head(2) self.assertEqual(len(res), 2) df = self.odps_df[self.odps_df["name"] == "name1"] res = df.head(1) self.assertEqual(len(res), 1) self.assertIsNotNone(df._cache_data) res = self.odps_df.tail(2) self.assertEqual(len(res), 2) self.assertTrue(all(it > 1 for it in res.values["id"])) self.assertEqual(len(self.odps_df.name.head(2)), 2) self.assertEqual(len(self.odps_df.name.tail(2)), 2) res = self.pd_df.head(1) self.assertEqual(len(res), 1) df = self.pd_df[self.pd_df["name"] == "name1"] res = df.head(1) self.assertEqual(len(res), 1) self.assertIsNotNone(df._cache_data) res = self.pd_df.tail(1) self.assertEqual(len(res), 1) self.assertEqual(res.values["id"][0], 6) self.assertEqual(len(self.pd_df.name.head(1)), 1) self.assertEqual(len(self.pd_df.name.tail(1)), 1) class TunnelOnlyODPSEngine(ODPSEngine): def execute(self, expr, **kw): expr = self._pre_process(expr) head = kw.get("head") return self._handle_cases(expr, head=head) engine = MixedEngine(self.odps) engine._odpssql_engine = TunnelOnlyODPSEngine(self.odps, global_optimize=False) res = engine.execute(self.odps_df["id"], head=3) self.assertIsNotNone(res) self.assertEqual(sum(res.values["id"]), 6) table_name = tn("pyodps_df_mixed2") self.odps.delete_table(table_name, if_exists=True) table = next(self.odps_df.data_source()) table2 = self.odps.create_table(table_name, table.schema) try: res = DataFrame(table2).head(10) self.assertEqual(len(res), 0) finally: table2.drop()