Esempio n. 1
0
class OdpsConn(object):
    """
    odps 连接
    """
    def __init__(self, project):
        self.access_id = ODPSCONF.key
        self.access_key = ODPSCONF.sec
        self.project = project

        self.odps = None

    def __enter__(self):
        try:
            self.odps = ODPS(self.access_id, self.access_key, self.project)
        except Exception as exc:
            raise ValueError(exc.message)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        del self

    def get_table_count_and_names(self):
        """
        获取一个项目下的table的数量和table的名字
        :return:
        """
        tables = self.odps.list_tables()
        names = [table.name for table in tables]
        count = len(names)
        return count, names

    def get_table_schema(self, tname):
        """
        获取表字段
        :return: 
        """
        table = self.odps.get_table(tname)
        _sa = table.schema
        _columns = _sa.columns
        schema = [item.name for item in _columns]
        return schema

    def execute_sql(self, sql):
        rest = []
        with self.odps.execute_sql(sql).open_reader() as reader:
            for record in reader:
                rest.append(record.values)
        return rest

    def get_table_last_update_time(self, tname):
        t = self.odps.get_table(tname)
        last_update_time = t.last_modified_time if t else None
        return last_update_time

    def count_table(self, table):
        sql = 'select count(1) from %s' % table
        with self.odps.execute_sql(sql).open_reader() as reader:
            return reader[0].values[0]
Esempio n. 2
0
def get_lt_etid():
    logging.info('12. 正在获取其他的企业 ')
    print utils.current_time(), '建立odps链接..'
    o = ODPS('LTAIzEuNzcL6qJJ8', 'eUAgj9ijhWCvOQ3w5Uv3FkwhNxvPF2',
             'database_test', 'http://service.odps.aliyun.com/api')
    print utils.current_time(), '进行查询...'
    pt = time.strftime('%Y%m%d', time.localtime(int(time.time() - 86400)))
    res = o.execute_sql(
        "select distinct etid from et_jobs where pt='{}' and isheadhunter=1".
        format(pt))
    print utils.current_time(), '处理查询结果...'
    etid_set = set()
    conn = utils.get_local_db()
    addtime = int(time.time())
    cnt = 0
    with res.open_reader() as reader:
        print utils.current_time(), '共需处理{}条!'.format(reader.count)
        for record in reader:
            etid_set.add((record['etid'], ))
            if len(etid_set) >= 1000:
                conn.executemany(
                    "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)"
                    .format(addtime), list(etid_set))
                cnt += 1000
                print utils.current_time(), '当前已写入{}条!'.format(cnt)
                etid_set.clear()
    if len(etid_set) > 0:
        conn.executemany(
            "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)"
            .format(addtime), list(etid_set))
        cnt += len(etid_set)
        print utils.current_time(), '当前已写入{}条!'.format(cnt)
    conn.close()
    return reader.count
def load2excel(filepath, begin, end):
    o = ODPS(ACCESS_KEY_ID,
             ACCESS_KEY_SECRET,
             PROJECT,
             endpoint='http://service.odps.aliyun.com/api')

    print(o)

    sql = "select * from stat_table where ds>={begin} and ds<={end}".format(begin=begin, end=end)
    with o.execute_sql(sql).open_reader() as reader:
        print(sql)
        print(reader.count)
        table_title = reader._schema.names
        print(table_title)

        workbook = openpyxl.Workbook()
        worksheet = workbook.create_sheet('Sheet')
        for i in range(len(table_title)):
            worksheet.cell(1, i + 1, table_title[i])

        write_count = 2
        for r in reader:
            for i in range(len(r.values)):  # 10个依次写入当前行
                worksheet.cell(write_count, i + 1, r.values[i])
            write_count = write_count + 1
        workbook.save(filename=filepath)
Esempio n. 4
0
class MaxComputeConnection(Connection):
    """MaxCompute connection, this class uses ODPS object to establish
    connection with maxcompute

    Args:
        conn_uri: uri in format:
        maxcompute://access_id:[email protected]/api?curr_project=test_ci&scheme=http
    """
    def __init__(self, conn_uri):
        super().__init__(conn_uri)
        self.params["database"] = self.params["curr_project"]
        # compose an endpoint, only keep the host and path and replace scheme
        endpoint = self.uripts._replace(scheme=self.params["scheme"],
                                        query="",
                                        netloc=self.uripts.hostname)
        self._conn = ODPS(self.uripts.username,
                          self.uripts.password,
                          project=self.params["database"],
                          endpoint=endpoint.geturl())

    def _parse_uri(self):
        return super()._parse_uri()

    def _get_result_set(self, statement):
        try:
            instance = self._conn.execute_sql(statement)
            return MaxComputeResultSet(instance)
        except Exception as e:
            return MaxComputeResultSet(None, str(e))

    def close(self):
        if self._conn:
            self._conn = None
    def createTimeButtons(self, event=None):
        start_date_select = self.cal1.selection_get().strftime("%Y%m%d")
        end_date_select = self.cal2.selection_get().strftime("%Y%m%d")
        cat1_select = self.genreCombo.get()
        cat2_select = [
            self.Listbox1.get(v) for v in self.Listbox1.curselection()
        ]
        cat3_select = [
            self.Listbox2.get(a) for a in self.Listbox2.curselection()
        ]
        file_name = str(self.file_name.get())

        print(start_date_select)
        print(end_date_select)
        print(cat1_select)
        print(cat2_select)
        print(cat3_select)
        print(file_name)

        # Web Hooks

        o = ODPS('', '', '', endpoint='')
        options.tunnel.endpoint = '**API URL**'
        query = """SELECT  DISTINCT buyer_id
                        ,email_address
                        ,phone_number
                        ,venture_category1_name_en
                        ,venture_category2_name_en
                        ,venture_category3_name_en
                  FROM   Table Name AS t
                  WHERE  venture = ""
                  AND    order_status_esm NOT IN ('invalid')
                  AND    category1_name_en in ('""" + cat1_select + """')
                  AND    category2_name_en in (""" + "'" + "','".join(
            cat2_select) + "'" + """)
                  AND    category3_name_en in (""" + "'" + "','".join(
                cat3_select) + "'" + """)
                  AND    TO_CHAR(t.order_create_date,'yyyymmdd') BETWEEN '""" + start_date_select + """'
                  AND    '""" + end_date_select + """'
                  """
        # Executing the query and set the data into a dataframe
        df = o.execute_sql(query).open_reader().to_result_frame().to_pandas()
        df.columns = df.columns.str.upper()
        date_string = arrow.now().format('YYYYMMDD') + ' ' + file_name
        df.to_csv(str(str(date_string) + '.csv'), index=False)
        print(query)
Esempio n. 6
0
# encoding=utf-8
from odps import ODPS
import pymysql

odps = ODPS('LTAI2wkz5kLt3205','RfTGzh2dfoBljs3ZZKwfpYw6OK9KYX','ofo')
project = odps.get_project()
print(project)

print(project.__getstate__())
t = odps.get_table('ofo_t_puser_partition')


with odps.execute_sql('select id from ofo_t_puser_partition where oauth in (1)').open_reader() as reader:
    
    #返回结构化结果
    #返回结果:
             
                odps.Record {
                 id  90637
                 }
                 odps.Record {
                 id  90640
                 }
             
    for record in reader:#返回结构化结果
        print(record)
    
    #返回原始sql结果
    print(reader)
Esempio n. 7
0
with o.execute_sql("""select A.carno,
    A.cname As Acname ,A.cvalue Acvalue,
    B.cname As Bcname ,B.cvalue Bcvalue,
    C.cname As Ccname, C.cvalue Ccvalue,
    D.cname As Dcname ,D.cvalue Dcvalue,
    E.cname As Ecname ,E.cvalue Ecvalue,
    'location' As Qcname, location AS Qcvalue
 from
 (select carno, 'city' as cname, city as cvalue, area from  ofo_t_bike_location where dt=20171221 ) A
left outer join
(select carno, 'order_num' as cname, 1 as cvalue from  ofo_t_bike_location where dt=20171221
and TO_CHAR(locate_time, 'yyyymmdd') >= '20171214') B on A.carno = B.carno
left outer join
(select carno, 'area' as cname, area as cvalue from  ofo_t_bike_location where dt=20171221
and TO_CHAR(locate_time, 'yyyymmdd') >= '20171214') D on A.carno = D.carno
left outer join
(select carno, 'lock_type' as cname, lock_type as cvalue from  ofo_t_bike_location where dt=20171221
and TO_CHAR(locate_time, 'yyyymmdd') >= '20171214') E on A.carno = E.carno
left outer join
(select carno, concat_ws(';',cast(lng as string), cast(lat as string) ) As location from ofo_t_bike_location where dt=20171221) Q on A.carno = Q.carno
left outer join
(
select carno,'silent_tag' as cname, tag as cvalue from (
select t_last_day_silent.carno,
    case when  t_today_order.carno is not null and t_engine_jd.bicycle_no is null then 'zr_active'
    when t_today_order.carno is not null and t_engine_jd.bicycle_no is not null then 'engine_active'
    when t_today_order.carno is null then 'continue_silent' end as tag

from
    (select carno from (
        select carno from (
            select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from  ofo_t_order_state where dt<=20171220
            ) a where rn=1 and dt<20171218
    union all
    select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) tt ) t_last_day_silent
    left outer join
    (select carno from  ofo_t_order_hg where dt=20171221
    group by carno) t_today_order on t_last_day_silent.carno = t_today_order.carno
    left outer join
    ( select bicycle_no from ofo_t_bicycle_mark_records where dt=20171221 and status=5 and  tags like '%silence%' group by bicycle_no) t_engine_jd
    on t_last_day_silent.carno = t_engine_jd.bicycle_no
union all
select t_today_silent.carno, 'new_add_silent' as tag
from
    (select carno from  (
        select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from  ofo_t_order_state where dt<=20171220
            ) a where rn=1 and dt<20171218
    union all
    select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) t_last_day_silent
    right outer join
    (select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from  ofo_t_order_hg where dt<=20171221
            ) a where rn=1 and createtime < '2017-12-19 00:00:00' ) t_today_silent on t_last_day_silent.carno = t_today_silent.carno
where t_last_day_silent.carno is null
    ) AA
union all
  select tt.carno, 'silent_tag' as cname,'continue_acitve' as cvalue from (
select carno from ofo_t_order_hg where dt>='20171218' group by carno
) tt left outer join
(select carno,'silent_tag' as cname, tag as cvalue from (
select t_last_day_silent.carno,
    case when  t_today_order.carno is not null and t_engine_jd.bicycle_no is null then 'zr_active'
    when t_today_order.carno is not null and t_engine_jd.bicycle_no is not null then 'engine_active'
    when t_today_order.carno is null then 'continue_silent' end as tag

from
    (select carno from (
        select carno from (
            select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from  ofo_t_order_state where dt<=20171220
            ) a where rn=1 and dt<20171218
    union all
    select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) tt ) t_last_day_silent
    left outer join
    (select carno from  ofo_t_order_hg where dt=20171221
    group by carno) t_today_order on t_last_day_silent.carno = t_today_order.carno
    left outer join
    ( select bicycle_no from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no) t_engine_jd
    on t_last_day_silent.carno = t_engine_jd.bicycle_no
union all
select t_today_silent.carno, 'new_add_silent' as tag
from
    (select carno from (
        select carno from (
            select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from  ofo_t_order_state where dt<=20171220
            ) a where rn=1 and dt<20171218
    union all
    select bicycle_no as carno from ofo_t_bicycle_mark_records where dt=20171221 and tags like '%silence%' and status=5 group by bicycle_no ) tmp group by carno ) tt ) t_last_day_silent
    right outer join
    (select carno from (select *,row_number() over(partition by carno order by createtime desc ) rn from  ofo_t_order_hg where dt<=20171221
            ) a where rn=1 and createtime < '2017-12-19 00:00:00' ) t_today_silent on t_last_day_silent.carno = t_today_silent.carno
where t_last_day_silent.carno is null
    ) AB ) BB on tt.carno = BB.carno where BB.carno is null
) C on A.carno = C.carno

left outer join (
select carno from  ofo_t_station_record where dt<='20171221' and dt>='20171214' and record_type=1
group by carno
) SS on A.carno = SS.carno
where SS.carno is  null""").open_reader() as reader:
Esempio n. 8
0
class MaxComputeConnection(Connection):
    """MaxCompute connection, this class uses ODPS object to establish
    connection with maxcompute

    Args:
        conn_uri: uri in format:
        maxcompute://access_id:[email protected]/api?curr_project=test_ci&scheme=http
    """
    def __init__(self, conn_uri):
        super(MaxComputeConnection, self).__init__(conn_uri)
        user, pwd, endpoint, proj = MaxComputeConnection.get_uri_parts(
            conn_uri)
        self.driver = "maxcompute"
        self.params["database"] = proj
        self.endpoint = endpoint
        self._conn = ODPS(user, pwd, project=proj, endpoint=endpoint)

    @staticmethod
    def get_uri_parts(uri):
        """Get username, password, endpoint, projectfrom given uri

        Args:
            uri: a valid maxcompute connection uri

        Returns:
            A tuple (username, password, endpoint, project)
        """
        uripts = urlparse(uri)
        params = parse_qs(uripts.query)
        # compose an endpoint, only keep the host and path and replace scheme
        endpoint = uripts._replace(scheme=params.get("scheme", ["http"])[0],
                                   query="",
                                   netloc=uripts.hostname)
        endpoint = endpoint.geturl()
        return (uripts.username, uripts.password, endpoint,
                params.get("curr_project", [""])[0])

    def _get_result_set(self, statement):
        try:
            instance = self._conn.execute_sql(statement)
            return MaxComputeResultSet(instance)
        except Exception as e:
            raise e

    def close(self):
        if self._conn:
            self._conn = None

    def get_table_schema(self, table_name):
        schema = self._conn.get_table(table_name).schema
        return [(c.name, str(c.type).upper()) for c in schema.columns]

    def persist_table(self, table):
        sql = "ALTER TABLE %s DISABLE LIFECYCLE;" % table
        self.execute(sql)

    def write_table(self,
                    table_name,
                    rows,
                    compress_option=COMPRESS_ODPS_ZLIB):
        """Append rows to given table, this is a driver specific api

        Args:
            table_name: the table to write
            rows: list of rows, each row is a data tuple,
                like [(1,True,"ok"),(2,False,"bad")]
            compress_options: the compress options defined in
                tunnel.CompressOption.CompressAlgorithm
        """
        self._conn.write_table(table_name,
                               rows,
                               compress_option=compress_option)
Esempio n. 9
0
class myOdps:

    # 初始化一个odps连接对象
    def __init__(self, access_id, secret_access_key, project):
        self.odps = ODPS(access_id=access_id,
                         secret_access_key=secret_access_key,
                         project=project,
                         end_point="http://service.odps.aliyun.com/api")

    # 获取所有表名
    def get_all_tabel(self):
        # return 返回所有的表名
        table_name = []
        for table in self.odps.list_tables():
            table_name.append(table.name)
        return table_name

    # 创建一张表
    def creat_table(self, table_name, columns=None, if_not_exists=True):
        # table_name: 表名
        # columns :  ('num bigint, num2 double', 'pt string') 字段和分组的元组
        # if_not_exists:True   不存在才创建
        # lifecycle:28   生命周期
        # return 返回表对象
        try:
            return self.odps.create_table(table_name,
                                          columns,
                                          if_not_exists=if_not_exists)
        except:
            return self.odps.get_table(table_name)

    # 通过表名直接获取一张表
    def get_a_table(self, table_name):
        # table_name: 表名
        # return 返回表对象
        return self.odps.get_table(table_name)

    # 删除一张表
    def drop_a_table(self, table_name):
        # table_name: 表名
        # return 返回表删除结果
        return self.odps.delete_table(table_name)

    # 获取一张表的所有分区
    def get_partitions(self, table):
        # table:表对象
        # return: 表的所有分区
        partitions = []
        for partition in table.partitions:
            partitions.append(partition.name)
        return partitions

    # ============= 数据上传 ============

    # 上传csv到odps并创建表,csv必须要有表头

    def uploadCSV(self, csvFilename, tableName, sep=",", pt=None):
        """
        :param csvFilename: 传入本地csv的路径,必须要有表头
        :param tableName:  上传到odps时的表名
        :param sep:   csv的分隔符
        :param pt:   是否创建分区
        """
        print("start upload ...\n")
        df = pd.read_csv(csvFilename, sep=sep)
        shape0 = df.shape[0]
        columns = [
            Column(name=f"{x}", type='string', comment='the column')
            for x in df.columns
        ]

        if pt:
            partitions = [
                Partition(name='pt', type='string', comment='the partition')
            ]
            schema = Schema(columns=columns, partitions=partitions)
            table = self.creat_table(tableName, schema)
            table.create_partition(f"pt={pt}", if_not_exists=True)
            table_columns = [i.name for i in table.schema.columns]
            with table.open_writer(partition=f"pt={pt}") as writer:
                for index in df.index:
                    print(f"{index+1}/{shape0} in {tableName}  ...")
                    item_dict = dict(df.loc[index])
                    item = []
                    for field in table_columns[:-1]:
                        item.append(item_dict.get(field, ''))
                    item.append(pt)
                    writer.write(item)
        else:
            schema = Schema(columns=columns)
            table = self.creat_table(tableName, schema)
            table_columns = [i.name for i in table.schema.columns]
            with table.open_writer(partition=None) as writer:
                for index in df.index:
                    print(f"{index+1}/{shape0} in {tableName}  ...")
                    item_dict = dict(df.loc[index])
                    item = []
                    for field in table_columns[:-1]:
                        item.append(item_dict.get(field, ''))
                    writer.write(item)
        print("\n\n upload finish ...")

    # 上传的过程中并进行下载,下载完再上传完整的数据,数据行的坐标为1的字段为下载地址
    def downloaAndUp(self,
                     csvFilename,
                     tableName,
                     sep=",",
                     urlIndex=1,
                     pt=None):
        """
        :param csvFilename: 传入本地csv的路径,必须要有表头
        :param tableName:  上传到odps时的表名
        :param sep:   csv的分隔符
        :param urlIndex: url字段的坐标位置
        """
        print("start upload ...\n")
        f = open(csvFilename, encoding='utf-8')
        first_line = f.readlines(1)[0].strip('\n').split(sep)
        columns = [
            Column(name=f"{x}", type='string', comment='the column')
            for x in first_line
        ]

        if pt:
            partitions = [
                Partition(name='pt', type='string', comment='the partition')
            ]
            schema = Schema(columns=columns, partitions=partitions)
            table = self.creat_table(tableName, schema)
            table.create_partition(f"pt={pt}", if_not_exists=True)
            with table.open_writer(partition=f"pt={pt}") as writer:
                for index, line in enumerate(f):
                    print(f"{index} in {tableName}  ...")
                    item = line.strip('\n').split(sep)
                    item.append(pt)
                    resp = download(item[urlIndex])
                    data = resp.text
                    if sys.getsizeof(data) <= 8 * 1024 * 1000:
                        item[urlIndex] = data
                    else:
                        print(f"failed in {item[0]}")
                    writer.write(item)
        else:
            schema = Schema(columns=columns)
            table = self.creat_table(tableName, schema)
            with table.open_writer(partition=None) as writer:
                for index, line in enumerate(f):
                    print(f"{index}  in {tableName}  ...")
                    item = line.strip('\n').split(sep)
                    resp = download(item[urlIndex])
                    data = resp.text
                    if sys.getsizeof(data) <= 8 * 1024 * 1000:
                        item[urlIndex] = data
                    else:
                        print(f"failed in {item[0]}")
                    writer.write(item)
        print("\n\n upload finish ...")
        f.close()

    # ===========执行sql=========
    # sql查询
    def select_sql(self, sql):
        # return: 查询结果的迭代对象
        with self.odps.execute_sql(sql).open_reader() as reader:
            return reader
Esempio n. 10
0
            'GyyStatistical',
            endpoint='https://service.odps.aliyun.com/api')

# 取到某个项目
project = odps.get_project('GyyStatistical')
# 取到默认项目
# project = odps.get_project()

# 列出项目下所有的表
print('----列出项目下所有的表 start----')
for table in odps.list_tables():
    print(table)
print('----列出项目下所有的表 end----\n')

print('----同步方式 执行SQL语句 start----')
instance = odps.execute_sql('select * from ots_arealist1')
with instance.open_reader() as reader:
    for record in reader:
        print(record)
        print(type(record))
print('----同步方式 执行SQL语句 end----\n')

# print('----异步方式 执行SQL语句 start----')
# instance = odps.run_sql('select * from ots_arealist')
# instance.wait_for_success()
# with instance.open_reader() as reader:
#     for record in reader:
#         print(record)
# print('----异步方式 执行SQL语句 end----\n')
#
# # 获取指定表句柄
Esempio n. 11
0
class ODPSIOTest(unittest.TestCase):
    def setUp(self):
        self._project = os.environ[ODPSConfig.PROJECT_NAME]
        self._access_id = os.environ[ODPSConfig.ACCESS_ID]
        self._access_key = os.environ[ODPSConfig.ACCESS_KEY]
        self._endpoint = os.environ.get(ODPSConfig.ENDPOINT)
        self._test_read_table = "test_odps_reader_%d_%d" % (
            int(time.time()),
            random.randint(1, 101),
        )
        self._test_write_table = "test_odps_writer_%d_%d" % (
            int(time.time()),
            random.randint(1, 101),
        )
        self._odps_client = ODPS(self._access_id, self._access_key,
                                 self._project, self._endpoint)
        self.create_iris_odps_table()

    def test_read_to_iterator(self):
        reader = ODPSReader(
            self._project,
            self._access_id,
            self._access_key,
            self._endpoint,
            self._test_read_table,
            None,
            4,
            None,
        )
        records_iter = reader.to_iterator(1, 0, 50, 2, False, None)
        records = list(records_iter)
        self.assertEqual(len(records), 6,
                         "Unexpected number of batches: %d" % len(records))
        flattened_records = [record for batch in records for record in batch]
        self.assertEqual(
            len(flattened_records),
            220,
            "Unexpected number of total records: %d" % len(flattened_records),
        )

    def test_write_odps_to_recordio_shards_from_iterator(self):
        reader = ODPSReader(
            self._project,
            self._access_id,
            self._access_key,
            self._endpoint,
            self._test_read_table,
            None,
            4,
            None,
        )
        records_iter = reader.to_iterator(1, 0, 50, 2, False, None)
        with tempfile.TemporaryDirectory() as output_dir:
            write_recordio_shards_from_iterator(
                records_iter,
                ["f" + str(i) for i in range(5)],
                output_dir,
                records_per_shard=50,
            )
            self.assertEqual(len(os.listdir(output_dir)), 5)

    def test_write_from_iterator(self):
        columns = ["num", "num2"]
        column_types = ["bigint", "double"]

        # If the table doesn't exist yet
        writer = ODPSWriter(
            self._project,
            self._access_id,
            self._access_key,
            self._endpoint,
            self._test_write_table,
            columns,
            column_types,
        )
        writer.from_iterator(iter([[1, 0.5], [2, 0.6]]), 2)
        table = self._odps_client.get_table(self._test_write_table,
                                            self._project)
        self.assertEqual(table.schema.names, columns)
        self.assertEqual(table.schema.types, column_types)
        self.assertEqual(table.to_df().count(), 1)

        # If the table already exists
        writer = ODPSWriter(
            self._project,
            self._access_id,
            self._access_key,
            self._endpoint,
            self._test_write_table,
        )
        writer.from_iterator(iter([[1, 0.5], [2, 0.6]]), 2)
        table = self._odps_client.get_table(self._test_write_table,
                                            self._project)
        self.assertEqual(table.schema.names, columns)
        self.assertEqual(table.schema.types, column_types)
        self.assertEqual(table.to_df().count(), 2)

    def create_iris_odps_table(self):
        sql_tmpl = """
        DROP TABLE IF EXISTS {PROJECT_NAME}.{TABLE_NAME};
        CREATE TABLE {PROJECT_NAME}.{TABLE_NAME} (
               sepal_length DOUBLE,
               sepal_width  DOUBLE,
               petal_length DOUBLE,
               petal_width  DOUBLE,
               class BIGINT);

        INSERT INTO {PROJECT_NAME}.{TABLE_NAME} VALUES
        (6.4,2.8,5.6,2.2,2),
        (5.0,2.3,3.3,1.0,1),
        (4.9,2.5,4.5,1.7,2),
        (4.9,3.1,1.5,0.1,0),
        (5.7,3.8,1.7,0.3,0),
        (4.4,3.2,1.3,0.2,0),
        (5.4,3.4,1.5,0.4,0),
        (6.9,3.1,5.1,2.3,2),
        (6.7,3.1,4.4,1.4,1),
        (5.1,3.7,1.5,0.4,0),
        (5.2,2.7,3.9,1.4,1),
        (6.9,3.1,4.9,1.5,1),
        (5.8,4.0,1.2,0.2,0),
        (5.4,3.9,1.7,0.4,0),
        (7.7,3.8,6.7,2.2,2),
        (6.3,3.3,4.7,1.6,1),
        (6.8,3.2,5.9,2.3,2),
        (7.6,3.0,6.6,2.1,2),
        (6.4,3.2,5.3,2.3,2),
        (5.7,4.4,1.5,0.4,0),
        (6.7,3.3,5.7,2.1,2),
        (6.4,2.8,5.6,2.1,2),
        (5.4,3.9,1.3,0.4,0),
        (6.1,2.6,5.6,1.4,2),
        (7.2,3.0,5.8,1.6,2),
        (5.2,3.5,1.5,0.2,0),
        (5.8,2.6,4.0,1.2,1),
        (5.9,3.0,5.1,1.8,2),
        (5.4,3.0,4.5,1.5,1),
        (6.7,3.0,5.0,1.7,1),
        (6.3,2.3,4.4,1.3,1),
        (5.1,2.5,3.0,1.1,1),
        (6.4,3.2,4.5,1.5,1),
        (6.8,3.0,5.5,2.1,2),
        (6.2,2.8,4.8,1.8,2),
        (6.9,3.2,5.7,2.3,2),
        (6.5,3.2,5.1,2.0,2),
        (5.8,2.8,5.1,2.4,2),
        (5.1,3.8,1.5,0.3,0),
        (4.8,3.0,1.4,0.3,0),
        (7.9,3.8,6.4,2.0,2),
        (5.8,2.7,5.1,1.9,2),
        (6.7,3.0,5.2,2.3,2),
        (5.1,3.8,1.9,0.4,0),
        (4.7,3.2,1.6,0.2,0),
        (6.0,2.2,5.0,1.5,2),
        (4.8,3.4,1.6,0.2,0),
        (7.7,2.6,6.9,2.3,2),
        (4.6,3.6,1.0,0.2,0),
        (7.2,3.2,6.0,1.8,2),
        (5.0,3.3,1.4,0.2,0),
        (6.6,3.0,4.4,1.4,1),
        (6.1,2.8,4.0,1.3,1),
        (5.0,3.2,1.2,0.2,0),
        (7.0,3.2,4.7,1.4,1),
        (6.0,3.0,4.8,1.8,2),
        (7.4,2.8,6.1,1.9,2),
        (5.8,2.7,5.1,1.9,2),
        (6.2,3.4,5.4,2.3,2),
        (5.0,2.0,3.5,1.0,1),
        (5.6,2.5,3.9,1.1,1),
        (6.7,3.1,5.6,2.4,2),
        (6.3,2.5,5.0,1.9,2),
        (6.4,3.1,5.5,1.8,2),
        (6.2,2.2,4.5,1.5,1),
        (7.3,2.9,6.3,1.8,2),
        (4.4,3.0,1.3,0.2,0),
        (7.2,3.6,6.1,2.5,2),
        (6.5,3.0,5.5,1.8,2),
        (5.0,3.4,1.5,0.2,0),
        (4.7,3.2,1.3,0.2,0),
        (6.6,2.9,4.6,1.3,1),
        (5.5,3.5,1.3,0.2,0),
        (7.7,3.0,6.1,2.3,2),
        (6.1,3.0,4.9,1.8,2),
        (4.9,3.1,1.5,0.1,0),
        (5.5,2.4,3.8,1.1,1),
        (5.7,2.9,4.2,1.3,1),
        (6.0,2.9,4.5,1.5,1),
        (6.4,2.7,5.3,1.9,2),
        (5.4,3.7,1.5,0.2,0),
        (6.1,2.9,4.7,1.4,1),
        (6.5,2.8,4.6,1.5,1),
        (5.6,2.7,4.2,1.3,1),
        (6.3,3.4,5.6,2.4,2),
        (4.9,3.1,1.5,0.1,0),
        (6.8,2.8,4.8,1.4,1),
        (5.7,2.8,4.5,1.3,1),
        (6.0,2.7,5.1,1.6,1),
        (5.0,3.5,1.3,0.3,0),
        (6.5,3.0,5.2,2.0,2),
        (6.1,2.8,4.7,1.2,1),
        (5.1,3.5,1.4,0.3,0),
        (4.6,3.1,1.5,0.2,0),
        (6.5,3.0,5.8,2.2,2),
        (4.6,3.4,1.4,0.3,0),
        (4.6,3.2,1.4,0.2,0),
        (7.7,2.8,6.7,2.0,2),
        (5.9,3.2,4.8,1.8,1),
        (5.1,3.8,1.6,0.2,0),
        (4.9,3.0,1.4,0.2,0),
        (4.9,2.4,3.3,1.0,1),
        (4.5,2.3,1.3,0.3,0),
        (5.8,2.7,4.1,1.0,1),
        (5.0,3.4,1.6,0.4,0),
        (5.2,3.4,1.4,0.2,0),
        (5.3,3.7,1.5,0.2,0),
        (5.0,3.6,1.4,0.2,0),
        (5.6,2.9,3.6,1.3,1),
        (4.8,3.1,1.6,0.2,0);
        """
        self._odps_client.execute_sql(
            sql_tmpl.format(PROJECT_NAME=self._project,
                            TABLE_NAME=self._test_read_table),
            hints={"odps.sql.submit.mode": "script"},
        )

    def tearDown(self):
        self._odps_client.delete_table(self._test_write_table,
                                       self._project,
                                       if_exists=True)
        self._odps_client.delete_table(self._test_read_table,
                                       self._project,
                                       if_exists=True)
Esempio n. 12
0
SELECT plan_id,count(id) group_count
FROM mysql_t_group_dim
where ds='${bdp.system.bizdate}'
group by plan_id
order by group_count desc 
limit 10
'''

import datetime

ds = (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime('%Y%m%d')
print(ds)

sql = sql.replace('${bdp.system.bizdate}', ds)

instance_id = o.execute_sql(sql)

reader = instance_id.open_reader()
print(reader.count)
print(reader._schema)

# 按行处理
for row in reader:
    # print(row.values)
    print(dict(zip(('plan', 'groupcount'), row.values)))

# 全部处理
result_df = reader.to_pandas()

print(result_df)