def testLargeRowsFormatter(self): data = [self._random_values() for _ in range(1000)] pd = ResultFrame(data=data, schema=self.schema, pandas=True) result = ResultFrame(data=data, schema=self.schema, pandas=False) self.assertEqual(to_str(repr(pd)), to_str(repr(result))) self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
def testSmallRowsFormatter(self): data = [self._random_values() for _ in range(10)] data[-1][0] = None pd = ResultFrame(data=data, schema=self.schema, pandas=True) result = ResultFrame(data=data, schema=self.schema, pandas=False) self.assertEqual(to_str(repr(pd)), to_str(repr(result))) self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_())) self.assertEqual(result._values, [r for r in result])
def testLargeColumnsFormatter(self): names = list(itertools.chain(*[[name + str(i) for name in self.schema.names] for i in range(10)])) types = self.schema.types * 10 schema = Schema.from_lists(names, types) gen_row = lambda: list(itertools.chain(*(self._random_values().values for _ in range(10)))) data = [Record(schema=df_schema_to_odps_schema(schema), values=gen_row()) for _ in range(10)] pd = ResultFrame(data=data, schema=schema, pandas=True) result = ResultFrame(data=data, schema=schema, pandas=False) self.assertEqual(to_str(repr(pd)), to_str(repr(result))) self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
def execute(self, line, cell=''): if self._odps is None: self._odps = enter().odps sql = line + '\n' + cell sql = sql.strip() if sql: bar = init_progress_bar() instance = self._odps.run_sql(sql) percent = 0 while not instance.is_terminated(): task_names = instance.get_task_names() last_percent = percent if len(task_names) > 0: percent = sum( self._get_task_percent(instance, name) for name in task_names) / len(task_names) else: percent = 0 percent = min(1, max(percent, last_percent)) bar.update(percent) time.sleep(1) instance.wait_for_success() bar.update(1) with instance.open_reader() as reader: try: import pandas as pd try: return pd.read_csv(StringIO(reader.raw)) except ValueError: return reader.raw except ImportError: return ResultFrame(list(reader), columns=reader._columns)
def execute(self, line, cell=''): self._set_odps() content = line + '\n' + cell content = content.strip() sql = None hints = dict() splits = content.split(';') for s in splits: stripped = s.strip() if stripped.lower().startswith('set '): hint = stripped.split(' ', 1)[1] k, v = hint.split('=', 1) k, v = k.strip(), v.strip() hints[k] = v elif len(stripped) == 0: continue else: if sql is None: sql = s else: sql = '%s;%s' % (sql, s) # replace user defined parameters sql = replace_sql_parameters(sql, self.shell.user_ns) if sql: bar = init_progress_bar() instance = self._odps.run_sql(sql, hints=hints) if options.verbose: stdout = options.verbose_log or self._to_stdout stdout('Instance ID: ' + instance.id) stdout(' Log view: ' + instance.get_logview_address()) percent = 0 while not instance.is_terminated(): task_names = instance.get_task_names() last_percent = percent if len(task_names) > 0: percent = sum(self._get_task_percent(instance, name) for name in task_names) / len(task_names) else: percent = 0 percent = min(1, max(percent, last_percent)) bar.update(percent) time.sleep(1) instance.wait_for_success() bar.update(1) try: with instance.open_reader() as reader: try: import pandas as pd from pandas.parser import CParserError try: return pd.read_csv(StringIO(reader.raw)) except (ValueError, CParserError): return reader.raw except ImportError: try: return ResultFrame(list(reader), columns=reader._columns) except TypeError: return reader.raw finally: bar.close()