def list_stores(self, line): line = line.strip() if line: room = enter(line) else: room = enter() return room.display()
def enter(self, line): room = line.strip() if room: r = enter(room) self._odps = r.odps else: r = enter() self._odps = r.odps return r
def enter(self, line): room = line.strip() if room: r = enter(room) self._odps = r.odps else: r = enter() self._odps = r.odps if 'o' not in self.shell.user_ns: self.shell.user_ns['o'] = self._odps return r
def _set_odps(self): if self._odps is not None: return if options.access_id is not None and \ options.access_key is not None and \ options.default_project is not None: self._odps = ODPS( options.access_id, options.access_key, options.default_project, endpoint=options.end_point, tunnel_endpoint=options.tunnel_endpoint ) else: self._odps = enter().odps
def test_construct(self): if utils.is_secret_mode(): get_odps_tuple = lambda o: (o.account.access_id, o.project, o. endpoint) else: get_odps_tuple = lambda o: (o.account.access_id, o.account. secret_access_key, o.project, o. endpoint) with_odps = RunnerContext(self.odps) self.assertTupleEqual(get_odps_tuple(self.odps), get_odps_tuple(with_odps._odps)) if not utils.is_secret_mode(): inter.teardown(TEST_CONTEXT_ROOM) inter.setup(*get_odps_tuple(self.odps), room=TEST_CONTEXT_ROOM) inter.enter(TEST_CONTEXT_ROOM) without_odps = RunnerContext() self.assertTupleEqual(get_odps_tuple(self.odps), get_odps_tuple(without_odps._odps)) inter.teardown(TEST_CONTEXT_ROOM)
def persist(self, line): import pandas as pd if self._odps is None: self._odps = enter().odps line = line.strip().strip(';') frame_name, table_name = line.split(None, 1) if '.' in table_name: project_name, table_name = tuple(table_name.split('.', 1)) else: project_name = None frame = self.shell.user_ns[frame_name] if not isinstance(frame, pd.DataFrame): raise TypeError('%s is not a Pandas DataFrame' % frame_name) columns = list(frame.columns) types = [ np_to_odps_types.get(tp, odps_types.string) for tp in frame.dtypes ] if self._odps.exist_table(table_name, project=project_name): raise TypeError('%s already exists') tb = self._odps.create_table(table_name, Schema.from_lists(columns, types)) def gen(df): size = len(df) bar = init_progress_bar(size) try: c = itertools.count() for row in df.values: i = next(c) if i % 50 == 0: bar.update(min(i, size)) yield tb.new_record(list(row)) bar.update(size) finally: bar.close() with tb.open_writer() as writer: writer.write(gen(frame))
def persist(self, line): import pandas as pd if self._odps is None: self._odps = enter().odps line = line.strip().strip(';') frame_name, table_name = line.split(None, 1) if '.' in table_name: project_name, table_name = tuple(table_name.split('.', 1)) else: project_name = None frame = self.shell.user_ns[frame_name] if not isinstance(frame, pd.DataFrame): raise TypeError('%s is not a Pandas DataFrame' % frame_name) columns = list(frame.columns) types = [np_to_odps_types.get(tp, odps_types.string) for tp in frame.dtypes] if self._odps.exist_table(table_name, project=project_name): raise TypeError('%s already exists') tb = self._odps.create_table(table_name, Schema.from_lists(columns, types)) def gen(df): size = len(df) bar = init_progress_bar(size) try: c = itertools.count() for row in df.values: i = next(c) if i % 50 == 0: bar.update(min(i, size)) yield tb.new_record(list(row)) bar.update(size) finally: bar.close() with tb.open_writer() as writer: writer.write(gen(frame))
def execute(self, line, cell=''): if self._odps is None: self._odps = enter().odps sql = line + '\n' + cell sql = sql.strip() if sql: bar = init_progress_bar() instance = self._odps.run_sql(sql) percent = 0 while not instance.is_terminated(): task_names = instance.get_task_names() last_percent = percent if len(task_names) > 0: percent = sum( self._get_task_percent(instance, name) for name in task_names) / len(task_names) else: percent = 0 percent = min(1, max(percent, last_percent)) bar.update(percent) time.sleep(1) instance.wait_for_success() bar.update(1) with instance.open_reader() as reader: try: import pandas as pd try: return pd.read_csv(StringIO(reader.raw)) except ValueError: return reader.raw except ImportError: return ResultFrame(list(reader), columns=reader._columns)
def execute(self, line, cell=''): if self._odps is None: self._odps = enter().odps sql = line + '\n' + cell sql = sql.strip() if sql: bar = init_progress_bar() instance = self._odps.run_sql(sql) percent = 0 while not instance.is_terminated(): task_names = instance.get_task_names() last_percent = percent if len(task_names) > 0: percent = sum(self._get_task_percent(instance, name) for name in task_names) / len(task_names) else: percent = 0 percent = min(1, max(percent, last_percent)) bar.update(percent) time.sleep(1) instance.wait_for_success() bar.update(1) with instance.open_reader() as reader: try: import pandas as pd try: return pd.read_csv(StringIO(reader.raw)) except ValueError: return reader.raw except ImportError: return ResultFrame(list(reader), columns=reader._columns)