class OdpsConn(object): """ odps 连接 """ def __init__(self, project): self.access_id = ODPSCONF.key self.access_key = ODPSCONF.sec self.project = project self.odps = None def __enter__(self): try: self.odps = ODPS(self.access_id, self.access_key, self.project) except Exception as exc: raise ValueError(exc.message) return self def __exit__(self, exc_type, exc_val, exc_tb): del self def get_table_count_and_names(self): """ 获取一个项目下的table的数量和table的名字 :return: """ tables = self.odps.list_tables() names = [table.name for table in tables] count = len(names) return count, names def get_table_schema(self, tname): """ 获取表字段 :return: """ table = self.odps.get_table(tname) _sa = table.schema _columns = _sa.columns schema = [item.name for item in _columns] return schema def execute_sql(self, sql): rest = [] with self.odps.execute_sql(sql).open_reader() as reader: for record in reader: rest.append(record.values) return rest def get_table_last_update_time(self, tname): t = self.odps.get_table(tname) last_update_time = t.last_modified_time if t else None return last_update_time def count_table(self, table): sql = 'select count(1) from %s' % table with self.odps.execute_sql(sql).open_reader() as reader: return reader[0].values[0]
def get_lt_etid(): logging.info('12. 正在获取其他的企业 ') print utils.current_time(), '建立odps链接..' o = ODPS('LTAIzEuNzcL6qJJ8', 'eUAgj9ijhWCvOQ3w5Uv3FkwhNxvPF2', 'database_test', 'http://service.odps.aliyun.com/api') print utils.current_time(), '进行查询...' pt = time.strftime('%Y%m%d', time.localtime(int(time.time() - 86400))) res = o.execute_sql( "select distinct etid from et_jobs where pt='{}' and isheadhunter=1". format(pt)) print utils.current_time(), '处理查询结果...' etid_set = set() conn = utils.get_local_db() addtime = int(time.time()) cnt = 0 with res.open_reader() as reader: print utils.current_time(), '共需处理{}条!'.format(reader.count) for record in reader: etid_set.add((record['etid'], )) if len(etid_set) >= 1000: conn.executemany( "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)" .format(addtime), list(etid_set)) cnt += 1000 print utils.current_time(), '当前已写入{}条!'.format(cnt) etid_set.clear() if len(etid_set) > 0: conn.executemany( "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)" .format(addtime), list(etid_set)) cnt += len(etid_set) print utils.current_time(), '当前已写入{}条!'.format(cnt) conn.close() return reader.count
def load2excel(filepath, begin, end): o = ODPS(ACCESS_KEY_ID, ACCESS_KEY_SECRET, PROJECT, endpoint='http://service.odps.aliyun.com/api') print(o) sql = "select * from stat_table where ds>={begin} and ds<={end}".format(begin=begin, end=end) with o.execute_sql(sql).open_reader() as reader: print(sql) print(reader.count) table_title = reader._schema.names print(table_title) workbook = openpyxl.Workbook() worksheet = workbook.create_sheet('Sheet') for i in range(len(table_title)): worksheet.cell(1, i + 1, table_title[i]) write_count = 2 for r in reader: for i in range(len(r.values)): # 10个依次写入当前行 worksheet.cell(write_count, i + 1, r.values[i]) write_count = write_count + 1 workbook.save(filename=filepath)
class MaxComputeConnection(Connection): """MaxCompute connection, this class uses ODPS object to establish connection with maxcompute Args: conn_uri: uri in format: maxcompute://access_id:[email protected]/api?curr_project=test_ci&scheme=http """ def __init__(self, conn_uri): super().__init__(conn_uri) self.params["database"] = self.params["curr_project"] # compose an endpoint, only keep the host and path and replace scheme endpoint = self.uripts._replace(scheme=self.params["scheme"], query="", netloc=self.uripts.hostname) self._conn = ODPS(self.uripts.username, self.uripts.password, project=self.params["database"], endpoint=endpoint.geturl()) def _parse_uri(self): return super()._parse_uri() def _get_result_set(self, statement): try: instance = self._conn.execute_sql(statement) return MaxComputeResultSet(instance) except Exception as e: return MaxComputeResultSet(None, str(e)) def close(self): if self._conn: self._conn = None
def createTimeButtons(self, event=None): start_date_select = self.cal1.selection_get().strftime("%Y%m%d") end_date_select = self.cal2.selection_get().strftime("%Y%m%d") cat1_select = self.genreCombo.get() cat2_select = [ self.Listbox1.get(v) for v in self.Listbox1.curselection() ] cat3_select = [ self.Listbox2.get(a) for a in self.Listbox2.curselection() ] file_name = str(self.file_name.get()) print(start_date_select) print(end_date_select) print(cat1_select) print(cat2_select) print(cat3_select) print(file_name) # Web Hooks o = ODPS('', '', '', endpoint='') options.tunnel.endpoint = '**API URL**' query = """SELECT DISTINCT buyer_id ,email_address ,phone_number ,venture_category1_name_en ,venture_category2_name_en ,venture_category3_name_en FROM Table Name AS t WHERE venture = "" AND order_status_esm NOT IN ('invalid') AND category1_name_en in ('""" + cat1_select + """') AND category2_name_en in (""" + "'" + "','".join( cat2_select) + "'" + """) AND category3_name_en in (""" + "'" + "','".join( cat3_select) + "'" + """) AND TO_CHAR(t.order_create_date,'yyyymmdd') BETWEEN '""" + start_date_select + """' AND '""" + end_date_select + """' """ # Executing the query and set the data into a dataframe df = o.execute_sql(query).open_reader().to_result_frame().to_pandas() df.columns = df.columns.str.upper() date_string = arrow.now().format('YYYYMMDD') + ' ' + file_name df.to_csv(str(str(date_string) + '.csv'), index=False) print(query)
# encoding=utf-8 from odps import ODPS import pymysql odps = ODPS('LTAI2wkz5kLt3205','RfTGzh2dfoBljs3ZZKwfpYw6OK9KYX','ofo') project = odps.get_project() print(project) print(project.__getstate__()) t = odps.get_table('ofo_t_puser_partition') with odps.execute_sql('select id from ofo_t_puser_partition where oauth in (1)').open_reader() as reader: #返回结构化结果 #返回结果: odps.Record { id 90637 } odps.Record { id 90640 } for record in reader:#返回结构化结果 print(record) #返回原始sql结果 print(reader)
with o.execute_sql("""select A.carno, A.cname As Acname ,A.cvalue Acvalue, B.cname As Bcname ,B.cvalue Bcvalue, C.cname As Ccname, C.cvalue Ccvalue, D.cname As Dcname ,D.cvalue Dcvalue, E.cname As Ecname ,E.cvalue Ecvalue, 'location' As Qcname, location AS Qcvalue from (select carno, 'city' as cname, city as cvalue, area from ofo_t_bike_location where dt=20171221 ) A left outer join (select carno, 'order_num' as cname, 1 as cvalue from ofo_t_bike_location where dt=20171221 and TO_CHAR(locate_time, 'yyyymmdd') >= '20171214') B on A.carno = B.carno left outer join (select carno, 'area' as cname, area as cvalue from ofo_t_bike_location where dt=20171221 and TO_CHAR(locate_time, 'yyyymmdd') >= '20171214') D on A.carno = D.carno left outer join (select carno, 'lock_type' as cname, lock_type as cvalue from ofo_t_bike_location where dt=20171221 and TO_CHAR(locate_time, 'yyyymmdd') >= '20171214') E on A.carno = E.carno left outer join (select carno, concat_ws(';',cast(lng as string), cast(lat as string) ) As location from ofo_t_bike_location where dt=20171221) Q on A.carno = Q.carno left outer join ( select carno,'silent_tag' as cname, tag as cvalue from ( select t_last_day_silent.carno, case when t_today_order.carno is not null and t_engine_jd.bicycle_no is null then 'zr_active' when t_today_order.carno is not null and t_engine_jd.bicycle_no is not null then 'engine_active' when t_today_order.carno is null then 'continue_silent' end as tag from (select carno from ( select carno from ( select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from ofo_t_order_state where dt<=20171220 ) a where rn=1 and dt<20171218 union all select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) tt ) t_last_day_silent left outer join (select carno from ofo_t_order_hg where dt=20171221 group by carno) t_today_order on t_last_day_silent.carno = t_today_order.carno left outer join ( select bicycle_no from ofo_t_bicycle_mark_records where dt=20171221 and status=5 and tags like '%silence%' group by bicycle_no) t_engine_jd on t_last_day_silent.carno = t_engine_jd.bicycle_no union all select t_today_silent.carno, 'new_add_silent' as tag from (select carno from ( select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from ofo_t_order_state where dt<=20171220 ) a where rn=1 and dt<20171218 union all select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) t_last_day_silent right outer join (select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from ofo_t_order_hg where dt<=20171221 ) a where rn=1 and createtime < '2017-12-19 00:00:00' ) t_today_silent on t_last_day_silent.carno = t_today_silent.carno where t_last_day_silent.carno is null ) AA union all select tt.carno, 'silent_tag' as cname,'continue_acitve' as cvalue from ( select carno from ofo_t_order_hg where dt>='20171218' group by carno ) tt left outer join (select carno,'silent_tag' as cname, tag as cvalue from ( select t_last_day_silent.carno, case when t_today_order.carno is not null and t_engine_jd.bicycle_no is null then 'zr_active' when t_today_order.carno is not null and t_engine_jd.bicycle_no is not null then 'engine_active' when t_today_order.carno is null then 'continue_silent' end as tag from (select carno from ( select carno from ( select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from ofo_t_order_state where dt<=20171220 ) a where rn=1 and dt<20171218 union all select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) tt ) t_last_day_silent left outer join (select carno from ofo_t_order_hg where dt=20171221 group by carno) t_today_order on t_last_day_silent.carno = t_today_order.carno left outer join ( select bicycle_no from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no) t_engine_jd on t_last_day_silent.carno = t_engine_jd.bicycle_no union all select t_today_silent.carno, 'new_add_silent' as tag from (select carno from ( select carno from ( select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from ofo_t_order_state where dt<=20171220 ) a where rn=1 and dt<20171218 union all select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) tt ) t_last_day_silent right outer join (select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from ofo_t_order_hg where dt<=20171221 ) a where rn=1 and createtime < '2017-12-19 00:00:00' ) t_today_silent on t_last_day_silent.carno = t_today_silent.carno where t_last_day_silent.carno is null ) AB ) BB on tt.carno = BB.carno where BB.carno is null ) C on A.carno = C.carno left outer join ( select carno from ofo_t_station_record where dt<='20171221' and dt>='20171214' and record_type=1 group by carno ) SS on A.carno = SS.carno where SS.carno is null""").open_reader() as reader:
class MaxComputeConnection(Connection): """MaxCompute connection, this class uses ODPS object to establish connection with maxcompute Args: conn_uri: uri in format: maxcompute://access_id:[email protected]/api?curr_project=test_ci&scheme=http """ def __init__(self, conn_uri): super(MaxComputeConnection, self).__init__(conn_uri) user, pwd, endpoint, proj = MaxComputeConnection.get_uri_parts( conn_uri) self.driver = "maxcompute" self.params["database"] = proj self.endpoint = endpoint self._conn = ODPS(user, pwd, project=proj, endpoint=endpoint) @staticmethod def get_uri_parts(uri): """Get username, password, endpoint, projectfrom given uri Args: uri: a valid maxcompute connection uri Returns: A tuple (username, password, endpoint, project) """ uripts = urlparse(uri) params = parse_qs(uripts.query) # compose an endpoint, only keep the host and path and replace scheme endpoint = uripts._replace(scheme=params.get("scheme", ["http"])[0], query="", netloc=uripts.hostname) endpoint = endpoint.geturl() return (uripts.username, uripts.password, endpoint, params.get("curr_project", [""])[0]) def _get_result_set(self, statement): try: instance = self._conn.execute_sql(statement) return MaxComputeResultSet(instance) except Exception as e: raise e def close(self): if self._conn: self._conn = None def get_table_schema(self, table_name): schema = self._conn.get_table(table_name).schema return [(c.name, str(c.type).upper()) for c in schema.columns] def persist_table(self, table): sql = "ALTER TABLE %s DISABLE LIFECYCLE;" % table self.execute(sql) def write_table(self, table_name, rows, compress_option=COMPRESS_ODPS_ZLIB): """Append rows to given table, this is a driver specific api Args: table_name: the table to write rows: list of rows, each row is a data tuple, like [(1,True,"ok"),(2,False,"bad")] compress_options: the compress options defined in tunnel.CompressOption.CompressAlgorithm """ self._conn.write_table(table_name, rows, compress_option=compress_option)
class myOdps: # 初始化一个odps连接对象 def __init__(self, access_id, secret_access_key, project): self.odps = ODPS(access_id=access_id, secret_access_key=secret_access_key, project=project, end_point="http://service.odps.aliyun.com/api") # 获取所有表名 def get_all_tabel(self): # return 返回所有的表名 table_name = [] for table in self.odps.list_tables(): table_name.append(table.name) return table_name # 创建一张表 def creat_table(self, table_name, columns=None, if_not_exists=True): # table_name: 表名 # columns : ('num bigint, num2 double', 'pt string') 字段和分组的元组 # if_not_exists:True 不存在才创建 # lifecycle:28 生命周期 # return 返回表对象 try: return self.odps.create_table(table_name, columns, if_not_exists=if_not_exists) except: return self.odps.get_table(table_name) # 通过表名直接获取一张表 def get_a_table(self, table_name): # table_name: 表名 # return 返回表对象 return self.odps.get_table(table_name) # 删除一张表 def drop_a_table(self, table_name): # table_name: 表名 # return 返回表删除结果 return self.odps.delete_table(table_name) # 获取一张表的所有分区 def get_partitions(self, table): # table:表对象 # return: 表的所有分区 partitions = [] for partition in table.partitions: partitions.append(partition.name) return partitions # ============= 数据上传 ============ # 上传csv到odps并创建表,csv必须要有表头 def uploadCSV(self, csvFilename, tableName, sep=",", pt=None): """ :param csvFilename: 传入本地csv的路径,必须要有表头 :param tableName: 上传到odps时的表名 :param sep: csv的分隔符 :param pt: 是否创建分区 """ print("start upload ...\n") df = pd.read_csv(csvFilename, sep=sep) shape0 = df.shape[0] columns = [ Column(name=f"{x}", type='string', comment='the column') for x in df.columns ] if pt: partitions = [ Partition(name='pt', type='string', comment='the partition') ] schema = Schema(columns=columns, partitions=partitions) table = self.creat_table(tableName, schema) table.create_partition(f"pt={pt}", if_not_exists=True) table_columns = [i.name for i in table.schema.columns] with table.open_writer(partition=f"pt={pt}") as writer: for index in df.index: print(f"{index+1}/{shape0} in {tableName} ...") item_dict = dict(df.loc[index]) item = [] for field in table_columns[:-1]: item.append(item_dict.get(field, '')) item.append(pt) writer.write(item) else: schema = Schema(columns=columns) table = self.creat_table(tableName, schema) table_columns = [i.name for i in table.schema.columns] with table.open_writer(partition=None) as writer: for index in df.index: print(f"{index+1}/{shape0} in {tableName} ...") item_dict = dict(df.loc[index]) item = [] for field in table_columns[:-1]: item.append(item_dict.get(field, '')) writer.write(item) print("\n\n upload finish ...") # 上传的过程中并进行下载,下载完再上传完整的数据,数据行的坐标为1的字段为下载地址 def downloaAndUp(self, csvFilename, tableName, sep=",", urlIndex=1, pt=None): """ :param csvFilename: 传入本地csv的路径,必须要有表头 :param tableName: 上传到odps时的表名 :param sep: csv的分隔符 :param urlIndex: url字段的坐标位置 """ print("start upload ...\n") f = open(csvFilename, encoding='utf-8') first_line = f.readlines(1)[0].strip('\n').split(sep) columns = [ Column(name=f"{x}", type='string', comment='the column') for x in first_line ] if pt: partitions = [ Partition(name='pt', type='string', comment='the partition') ] schema = Schema(columns=columns, partitions=partitions) table = self.creat_table(tableName, schema) table.create_partition(f"pt={pt}", if_not_exists=True) with table.open_writer(partition=f"pt={pt}") as writer: for index, line in enumerate(f): print(f"{index} in {tableName} ...") item = line.strip('\n').split(sep) item.append(pt) resp = download(item[urlIndex]) data = resp.text if sys.getsizeof(data) <= 8 * 1024 * 1000: item[urlIndex] = data else: print(f"failed in {item[0]}") writer.write(item) else: schema = Schema(columns=columns) table = self.creat_table(tableName, schema) with table.open_writer(partition=None) as writer: for index, line in enumerate(f): print(f"{index} in {tableName} ...") item = line.strip('\n').split(sep) resp = download(item[urlIndex]) data = resp.text if sys.getsizeof(data) <= 8 * 1024 * 1000: item[urlIndex] = data else: print(f"failed in {item[0]}") writer.write(item) print("\n\n upload finish ...") f.close() # ===========执行sql========= # sql查询 def select_sql(self, sql): # return: 查询结果的迭代对象 with self.odps.execute_sql(sql).open_reader() as reader: return reader
'GyyStatistical', endpoint='https://service.odps.aliyun.com/api') # 取到某个项目 project = odps.get_project('GyyStatistical') # 取到默认项目 # project = odps.get_project() # 列出项目下所有的表 print('----列出项目下所有的表 start----') for table in odps.list_tables(): print(table) print('----列出项目下所有的表 end----\n') print('----同步方式 执行SQL语句 start----') instance = odps.execute_sql('select * from ots_arealist1') with instance.open_reader() as reader: for record in reader: print(record) print(type(record)) print('----同步方式 执行SQL语句 end----\n') # print('----异步方式 执行SQL语句 start----') # instance = odps.run_sql('select * from ots_arealist') # instance.wait_for_success() # with instance.open_reader() as reader: # for record in reader: # print(record) # print('----异步方式 执行SQL语句 end----\n') # # # 获取指定表句柄
class ODPSIOTest(unittest.TestCase): def setUp(self): self._project = os.environ[ODPSConfig.PROJECT_NAME] self._access_id = os.environ[ODPSConfig.ACCESS_ID] self._access_key = os.environ[ODPSConfig.ACCESS_KEY] self._endpoint = os.environ.get(ODPSConfig.ENDPOINT) self._test_read_table = "test_odps_reader_%d_%d" % ( int(time.time()), random.randint(1, 101), ) self._test_write_table = "test_odps_writer_%d_%d" % ( int(time.time()), random.randint(1, 101), ) self._odps_client = ODPS(self._access_id, self._access_key, self._project, self._endpoint) self.create_iris_odps_table() def test_read_to_iterator(self): reader = ODPSReader( self._project, self._access_id, self._access_key, self._endpoint, self._test_read_table, None, 4, None, ) records_iter = reader.to_iterator(1, 0, 50, 2, False, None) records = list(records_iter) self.assertEqual(len(records), 6, "Unexpected number of batches: %d" % len(records)) flattened_records = [record for batch in records for record in batch] self.assertEqual( len(flattened_records), 220, "Unexpected number of total records: %d" % len(flattened_records), ) def test_write_odps_to_recordio_shards_from_iterator(self): reader = ODPSReader( self._project, self._access_id, self._access_key, self._endpoint, self._test_read_table, None, 4, None, ) records_iter = reader.to_iterator(1, 0, 50, 2, False, None) with tempfile.TemporaryDirectory() as output_dir: write_recordio_shards_from_iterator( records_iter, ["f" + str(i) for i in range(5)], output_dir, records_per_shard=50, ) self.assertEqual(len(os.listdir(output_dir)), 5) def test_write_from_iterator(self): columns = ["num", "num2"] column_types = ["bigint", "double"] # If the table doesn't exist yet writer = ODPSWriter( self._project, self._access_id, self._access_key, self._endpoint, self._test_write_table, columns, column_types, ) writer.from_iterator(iter([[1, 0.5], [2, 0.6]]), 2) table = self._odps_client.get_table(self._test_write_table, self._project) self.assertEqual(table.schema.names, columns) self.assertEqual(table.schema.types, column_types) self.assertEqual(table.to_df().count(), 1) # If the table already exists writer = ODPSWriter( self._project, self._access_id, self._access_key, self._endpoint, self._test_write_table, ) writer.from_iterator(iter([[1, 0.5], [2, 0.6]]), 2) table = self._odps_client.get_table(self._test_write_table, self._project) self.assertEqual(table.schema.names, columns) self.assertEqual(table.schema.types, column_types) self.assertEqual(table.to_df().count(), 2) def create_iris_odps_table(self): sql_tmpl = """ DROP TABLE IF EXISTS {PROJECT_NAME}.{TABLE_NAME}; CREATE TABLE {PROJECT_NAME}.{TABLE_NAME} ( sepal_length DOUBLE, sepal_width DOUBLE, petal_length DOUBLE, petal_width DOUBLE, class BIGINT); INSERT INTO {PROJECT_NAME}.{TABLE_NAME} VALUES (6.4,2.8,5.6,2.2,2), (5.0,2.3,3.3,1.0,1), (4.9,2.5,4.5,1.7,2), (4.9,3.1,1.5,0.1,0), (5.7,3.8,1.7,0.3,0), (4.4,3.2,1.3,0.2,0), (5.4,3.4,1.5,0.4,0), (6.9,3.1,5.1,2.3,2), (6.7,3.1,4.4,1.4,1), (5.1,3.7,1.5,0.4,0), (5.2,2.7,3.9,1.4,1), (6.9,3.1,4.9,1.5,1), (5.8,4.0,1.2,0.2,0), (5.4,3.9,1.7,0.4,0), (7.7,3.8,6.7,2.2,2), (6.3,3.3,4.7,1.6,1), (6.8,3.2,5.9,2.3,2), (7.6,3.0,6.6,2.1,2), (6.4,3.2,5.3,2.3,2), (5.7,4.4,1.5,0.4,0), (6.7,3.3,5.7,2.1,2), (6.4,2.8,5.6,2.1,2), (5.4,3.9,1.3,0.4,0), (6.1,2.6,5.6,1.4,2), (7.2,3.0,5.8,1.6,2), (5.2,3.5,1.5,0.2,0), (5.8,2.6,4.0,1.2,1), (5.9,3.0,5.1,1.8,2), (5.4,3.0,4.5,1.5,1), (6.7,3.0,5.0,1.7,1), (6.3,2.3,4.4,1.3,1), (5.1,2.5,3.0,1.1,1), (6.4,3.2,4.5,1.5,1), (6.8,3.0,5.5,2.1,2), (6.2,2.8,4.8,1.8,2), (6.9,3.2,5.7,2.3,2), (6.5,3.2,5.1,2.0,2), (5.8,2.8,5.1,2.4,2), (5.1,3.8,1.5,0.3,0), (4.8,3.0,1.4,0.3,0), (7.9,3.8,6.4,2.0,2), (5.8,2.7,5.1,1.9,2), (6.7,3.0,5.2,2.3,2), (5.1,3.8,1.9,0.4,0), (4.7,3.2,1.6,0.2,0), (6.0,2.2,5.0,1.5,2), (4.8,3.4,1.6,0.2,0), (7.7,2.6,6.9,2.3,2), (4.6,3.6,1.0,0.2,0), (7.2,3.2,6.0,1.8,2), (5.0,3.3,1.4,0.2,0), (6.6,3.0,4.4,1.4,1), (6.1,2.8,4.0,1.3,1), (5.0,3.2,1.2,0.2,0), (7.0,3.2,4.7,1.4,1), (6.0,3.0,4.8,1.8,2), (7.4,2.8,6.1,1.9,2), (5.8,2.7,5.1,1.9,2), (6.2,3.4,5.4,2.3,2), (5.0,2.0,3.5,1.0,1), (5.6,2.5,3.9,1.1,1), (6.7,3.1,5.6,2.4,2), (6.3,2.5,5.0,1.9,2), (6.4,3.1,5.5,1.8,2), (6.2,2.2,4.5,1.5,1), (7.3,2.9,6.3,1.8,2), (4.4,3.0,1.3,0.2,0), (7.2,3.6,6.1,2.5,2), (6.5,3.0,5.5,1.8,2), (5.0,3.4,1.5,0.2,0), (4.7,3.2,1.3,0.2,0), (6.6,2.9,4.6,1.3,1), (5.5,3.5,1.3,0.2,0), (7.7,3.0,6.1,2.3,2), (6.1,3.0,4.9,1.8,2), (4.9,3.1,1.5,0.1,0), (5.5,2.4,3.8,1.1,1), (5.7,2.9,4.2,1.3,1), (6.0,2.9,4.5,1.5,1), (6.4,2.7,5.3,1.9,2), (5.4,3.7,1.5,0.2,0), (6.1,2.9,4.7,1.4,1), (6.5,2.8,4.6,1.5,1), (5.6,2.7,4.2,1.3,1), (6.3,3.4,5.6,2.4,2), (4.9,3.1,1.5,0.1,0), (6.8,2.8,4.8,1.4,1), (5.7,2.8,4.5,1.3,1), (6.0,2.7,5.1,1.6,1), (5.0,3.5,1.3,0.3,0), (6.5,3.0,5.2,2.0,2), (6.1,2.8,4.7,1.2,1), (5.1,3.5,1.4,0.3,0), (4.6,3.1,1.5,0.2,0), (6.5,3.0,5.8,2.2,2), (4.6,3.4,1.4,0.3,0), (4.6,3.2,1.4,0.2,0), (7.7,2.8,6.7,2.0,2), (5.9,3.2,4.8,1.8,1), (5.1,3.8,1.6,0.2,0), (4.9,3.0,1.4,0.2,0), (4.9,2.4,3.3,1.0,1), (4.5,2.3,1.3,0.3,0), (5.8,2.7,4.1,1.0,1), (5.0,3.4,1.6,0.4,0), (5.2,3.4,1.4,0.2,0), (5.3,3.7,1.5,0.2,0), (5.0,3.6,1.4,0.2,0), (5.6,2.9,3.6,1.3,1), (4.8,3.1,1.6,0.2,0); """ self._odps_client.execute_sql( sql_tmpl.format(PROJECT_NAME=self._project, TABLE_NAME=self._test_read_table), hints={"odps.sql.submit.mode": "script"}, ) def tearDown(self): self._odps_client.delete_table(self._test_write_table, self._project, if_exists=True) self._odps_client.delete_table(self._test_read_table, self._project, if_exists=True)
SELECT plan_id,count(id) group_count FROM mysql_t_group_dim where ds='${bdp.system.bizdate}' group by plan_id order by group_count desc limit 10 ''' import datetime ds = (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime('%Y%m%d') print(ds) sql = sql.replace('${bdp.system.bizdate}', ds) instance_id = o.execute_sql(sql) reader = instance_id.open_reader() print(reader.count) print(reader._schema) # 按行处理 for row in reader: # print(row.values) print(dict(zip(('plan', 'groupcount'), row.values))) # 全部处理 result_df = reader.to_pandas() print(result_df)