コード例 #1
0
    def testTokenizedSignServerAccount(self):
        server = SignServer(token=str(uuid.uuid4()))
        server.accounts[
            self.odps.account.access_id] = self.odps.account.secret_access_key
        try:
            server.start(('127.0.0.1', 0))
            account = SignServerAccount(self.odps.account.access_id,
                                        server.server.server_address)
            odps = ODPS(None,
                        None,
                        self.odps.project,
                        self.odps.endpoint,
                        account=account)
            self.assertRaises(
                SignServerError,
                lambda: odps.delete_table(tn('test_sign_account_table'),
                                          if_exists=True))

            account = SignServerAccount(self.odps.account.access_id,
                                        server.server.server_address,
                                        token=server.token)
            odps = ODPS(None,
                        None,
                        self.odps.project,
                        self.odps.endpoint,
                        account=account)
            odps.delete_table(tn('test_sign_account_table'), if_exists=True)
            t = odps.create_table(tn('test_sign_account_table'),
                                  'col string',
                                  lifecycle=1)
            self.assertTrue(odps.exist_table(tn('test_sign_account_table')))
            t.drop(async=True)
        finally:
            server.stop()
コード例 #2
0
    def testBearerTokenAccount(self):
        self.odps.delete_table(tn('test_bearer_token_account_table'),
                               if_exists=True)
        t = self.odps.create_table(tn('test_bearer_token_account_table'),
                                   'col string',
                                   lifecycle=1)
        with t.open_writer() as writer:
            records = [['val1'], ['val2'], ['val3']]
            writer.write(records)

        inst = self.odps.execute_sql('select count(*) from {0}'.format(
            tn('test_bearer_token_account_table')),
                                     async_=True)
        inst.wait_for_success()
        task_name = inst.get_task_names()[0]

        logview_address = inst.get_logview_address()
        token = logview_address[logview_address.find('token=') +
                                len('token='):]
        bearer_token_account = BearerTokenAccount(token=token)
        bearer_token_odps = ODPS(None,
                                 None,
                                 self.odps.project,
                                 self.odps.endpoint,
                                 account=bearer_token_account)
        bearer_token_instance = bearer_token_odps.get_instance(inst.id)

        self.assertEqual(inst.get_task_result(task_name),
                         bearer_token_instance.get_task_result(task_name))
        self.assertEqual(inst.get_task_summary(task_name),
                         bearer_token_instance.get_task_summary(task_name))

        with self.assertRaises(errors.NoPermission):
            bearer_token_odps.create_table(
                tn('test_bearer_token_account_table_test1'),
                'col string',
                lifecycle=1)

        fake_token_account = BearerTokenAccount(token='fake-token')
        bearer_token_odps = ODPS(None,
                                 None,
                                 self.odps.project,
                                 self.odps.endpoint,
                                 account=fake_token_account)

        with self.assertRaises(errors.ODPSError):
            bearer_token_odps.create_table(
                tn('test_bearer_token_account_table_test2'),
                'col string',
                lifecycle=1)
コード例 #3
0
    def check_instance_idle(self):
        last_active_time = self._last_activity_time
        has_running = False
        for ref in self._session_refs.values():
            for info in ref.get_graph_infos().values():
                if info.get('end_time') is None:
                    has_running = True
                    break
                else:
                    last_active_time = max(info['end_time'], last_active_time)
            if has_running:
                break

        if not has_running and last_active_time < time.time(
        ) - self._idle_timeout:
            # timeout: we need to kill the instance
            from odps import ODPS
            from odps.accounts import BearerTokenAccount
            from cupid.runtime import context

            logger.warning('Timeout met, killing the instance now.')

            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ['ODPS_PROJECT_NAME']
            endpoint = os.environ['ODPS_RUNTIME_ENDPOINT']
            o = ODPS(None,
                     None,
                     account=account,
                     project=project,
                     endpoint=endpoint)

            o.stop_instance(os.environ['MARS_K8S_POD_NAMESPACE'])
        else:
            self.ref().check_instance_idle(_delay=10, _tell=True, _wait=False)
コード例 #4
0
def load2excel(filepath, begin, end):
    o = ODPS(ACCESS_KEY_ID,
             ACCESS_KEY_SECRET,
             PROJECT,
             endpoint='http://service.odps.aliyun.com/api')

    print(o)

    sql = "select * from stat_table where ds>={begin} and ds<={end}".format(begin=begin, end=end)
    with o.execute_sql(sql).open_reader() as reader:
        print(sql)
        print(reader.count)
        table_title = reader._schema.names
        print(table_title)

        workbook = openpyxl.Workbook()
        worksheet = workbook.create_sheet('Sheet')
        for i in range(len(table_title)):
            worksheet.cell(1, i + 1, table_title[i])

        write_count = 2
        for r in reader:
            for i in range(len(r.values)):  # 10个依次写入当前行
                worksheet.cell(write_count, i + 1, r.values[i])
            write_count = write_count + 1
        workbook.save(filename=filepath)
コード例 #5
0
def _handle_terminate_instance(sock):
    from cupid.runtime import context, RuntimeContext
    from odps import ODPS
    from odps.accounts import BearerTokenAccount

    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        instance_id = cmd_body['instance_id']

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
        else:
            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ['ODPS_PROJECT_NAME']
            endpoint = os.environ['ODPS_RUNTIME_ENDPOINT']
            o = ODPS(None,
                     None,
                     account=account,
                     project=project,
                     endpoint=endpoint)

            o.stop_instance(instance_id)
    except:
        logger.exception('Failed to put kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #6
0
    def setUp(self):
        self.project = os.environ[MaxComputeConfig.PROJECT_NAME]
        access_id = os.environ[MaxComputeConfig.ACCESS_ID]
        access_key = os.environ[MaxComputeConfig.ACCESS_KEY]
        endpoint = os.environ.get(MaxComputeConfig.ENDPOINT)
        tunnel_endpoint = os.environ.get(MaxComputeConfig.TUNNEL_ENDPOINT,
                                         None)
        self.test_table = "test_odps_data_reader_%d_%d" % (
            int(time.time()),
            random.randint(1, 101),
        )
        self.odps_client = ODPS(access_id, access_key, self.project, endpoint)
        create_iris_odps_table(self.odps_client, self.project, self.test_table)
        self.records_per_task = 50

        self.reader = ODPSDataReader(
            project=self.project,
            access_id=access_id,
            access_key=access_key,
            endpoint=endpoint,
            table=self.test_table,
            tunnel_endpoint=tunnel_endpoint,
            num_processes=1,
            records_per_task=self.records_per_task,
        )
コード例 #7
0
    def check_instance_idle(self):
        from cupid.runtime import context

        has_running, active_time_from_service = self._get_service_activity_info(
        )
        if active_time_from_service != self._last_active_time_from_service:
            self._last_active_time = active_time_from_service
            self._last_active_time_from_service = active_time_from_service
        elif has_running:
            self._last_active_time = time.time()

        if self._last_active_time < time.time() - self._idle_timeout:
            # timeout: we need to kill the instance
            from odps import ODPS
            from odps.accounts import BearerTokenAccount

            logger.warning('Timeout met, killing the instance now.')

            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ['ODPS_PROJECT_NAME']
            endpoint = os.environ['ODPS_RUNTIME_ENDPOINT']
            o = ODPS(None,
                     None,
                     account=account,
                     project=project,
                     endpoint=endpoint)

            o.stop_instance(os.environ['MARS_K8S_POD_NAMESPACE'])
        else:
            kv_store = context().kv_store()
            kv_store[CUPID_LAST_IDLE_TIME_KEY] = str(self._last_active_time)
            self.ref().check_instance_idle(_delay=10, _tell=True, _wait=False)
コード例 #8
0
    def execute(cls, ctx, op):
        import pandas as pd
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.io.table import CupidTableUploadSession

        if op.is_terminal:
            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ.get('ODPS_PROJECT_NAME', None)
            odps_params = op.odps_params.copy()
            if project:
                odps_params['project'] = project
            endpoint = os.environ.get(
                'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
            o = ODPS(None,
                     None,
                     account=account,
                     project=odps_params['project'],
                     endpoint=endpoint)
            cupid_session = CupidSession(o)

            project_name, table_name = op.table_name.split('.')
            upload_session = CupidTableUploadSession(session=cupid_session,
                                                     table_name=table_name,
                                                     project_name=project_name,
                                                     handle=op.cupid_handle,
                                                     blocks=op.blocks)
            upload_session.commit(overwrite=op.overwrite)

        ctx[op.outputs[0].key] = pd.DataFrame()
コード例 #9
0
    def _execute_in_cupid(cls, ctx, op):
        import os

        import pandas as pd
        from odps import ODPS
        from odps.accounts import BearerTokenAccount

        cupid_client = CupidServiceClient()
        to_store_data = ctx[op.inputs[0].key]

        bearer_token = cupid_client.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME', None)
        odps_params = op.odps_params.copy()
        if project:
            odps_params['project'] = project
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=odps_params['project'],
                 endpoint=endpoint)
        odps_schema = o.get_table(op.table_name).schema
        project_name, table_name = op.table_name.split('.')

        writer_config = dict(_table_name=table_name,
                             _project_name=project_name,
                             _table_schema=odps_schema,
                             _partition_spec=op.partition_spec,
                             _block_id=op.block_id,
                             _handle=op.cupid_handle)
        cupid_client.write_table_data(writer_config, to_store_data,
                                      op.write_batch_size)
        ctx[op.outputs[0].key] = pd.DataFrame()
コード例 #10
0
ファイル: get_etids.py プロジェクト: zhaozhao17/PythonSpider
def get_lt_etid():
    logging.info('12. 正在获取其他的企业 ')
    print utils.current_time(), '建立odps链接..'
    o = ODPS('LTAIzEuNzcL6qJJ8', 'eUAgj9ijhWCvOQ3w5Uv3FkwhNxvPF2',
             'database_test', 'http://service.odps.aliyun.com/api')
    print utils.current_time(), '进行查询...'
    pt = time.strftime('%Y%m%d', time.localtime(int(time.time() - 86400)))
    res = o.execute_sql(
        "select distinct etid from et_jobs where pt='{}' and isheadhunter=1".
        format(pt))
    print utils.current_time(), '处理查询结果...'
    etid_set = set()
    conn = utils.get_local_db()
    addtime = int(time.time())
    cnt = 0
    with res.open_reader() as reader:
        print utils.current_time(), '共需处理{}条!'.format(reader.count)
        for record in reader:
            etid_set.add((record['etid'], ))
            if len(etid_set) >= 1000:
                conn.executemany(
                    "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)"
                    .format(addtime), list(etid_set))
                cnt += 1000
                print utils.current_time(), '当前已写入{}条!'.format(cnt)
                etid_set.clear()
    if len(etid_set) > 0:
        conn.executemany(
            "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)"
            .format(addtime), list(etid_set))
        cnt += len(etid_set)
        print utils.current_time(), '当前已写入{}条!'.format(cnt)
    conn.close()
    return reader.count
コード例 #11
0
ファイル: maxcompute.py プロジェクト: zlb1028/sqlflow
 def __init__(self, conn_uri):
     super(MaxComputeConnection, self).__init__(conn_uri)
     user, pwd, endpoint, proj = MaxComputeConnection.get_uri_parts(
         conn_uri)
     self.driver = "maxcompute"
     self.params["database"] = proj
     self.endpoint = endpoint
     self._conn = ODPS(user, pwd, project=proj, endpoint=endpoint)
コード例 #12
0
    def tile(cls, op):
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context

        bearer_token = context().get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        o = ODPS(None, None, account=account, **op.odps_params)
        cupid_session = CupidSession(o)

        data_src = o.get_table(op.table_name)

        logger.debug('Start creating upload session from cupid.')
        upload_session = cupid_session.create_upload_session(data_src)

        input_df = op.inputs[0]

        out_chunks = []
        out_chunk_shape = (0,) * len(input_df.shape)
        blocks = {}
        for chunk in input_df.chunks:
            block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace('-', '')
            chunk_op = DataFrameWriteTableSplit(dtypes=op.dtypes, table_name=op.table_name,
                                                partition_spec=op.partition_spec,
                                                cupid_handle=to_str(upload_session.handle),
                                                block_id=block_id, write_batch_size=op.write_batch_size)
            out_chunk = chunk_op.new_chunk([chunk], shape=out_chunk_shape, index=chunk.index, dtypes=chunk.dtypes)
            out_chunks.append(out_chunk)
            blocks[block_id] = op.partition_spec

        # build commit tree
        combine_size = 8
        chunks = out_chunks
        while len(chunks) > combine_size:
            new_chunks = []
            for i in range(0, len(chunks), combine_size):
                chks = chunks[i: i + combine_size]
                if len(chks) == 1:
                    chk = chks[0]
                else:
                    chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes, is_terminal=False)
                    chk = chk_op.new_chunk(chks, shape=out_chunk_shape, dtypes=op.dtypes)
                new_chunks.append(chk)
            chunks = new_chunks

        assert len(chunks) < combine_size

        commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes, table_name=op.table_name, blocks=blocks,
                                                    cupid_handle=to_str(upload_session.handle),
                                                    overwrite=op.overwrite, odps_params=op.odps_params,
                                                    is_terminal=True)
        commit_table_chunk = commit_table_op.new_chunk(chunks, shape=out_chunk_shape, dtypes=op.dtypes)

        out_df = op.outputs[0]
        new_op = op.copy()
        return new_op.new_dataframes(op.inputs, shape=out_df.shape,
                                     dtypes=out_df.dtypes, chunks=[commit_table_chunk],
                                     nsplits=((0,),) * len(out_chunk_shape))
コード例 #13
0
 def _get_table_schema(self):
     odps_client = ODPS(
         access_id=self._kwargs["access_id"],
         secret_access_key=self._kwargs["access_key"],
         project=self._kwargs["project"],
         endpoint=self._kwargs.get("endpoint"),
     )
     odps_table = odps_client.get_table(self._kwargs["table"])
     return odps_table.schema
コード例 #14
0
    def _execute_arrow_tunnel(cls, ctx, op):
        from odps import ODPS
        from odps.tunnel import TableTunnel
        import pyarrow as pa
        import pandas as pd

        project = os.environ.get('ODPS_PROJECT_NAME', None)
        odps_params = op.odps_params.copy()
        if project:
            odps_params['project'] = project
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(odps_params['access_id'],
                 odps_params['secret_access_key'],
                 project=odps_params['project'],
                 endpoint=endpoint)

        t = o.get_table(op.table_name)
        tunnel = TableTunnel(o, project=t.project)
        retry_times = options.retry_times

        retries = 0
        while True:
            try:
                if op.partition_spec is not None:
                    upload_session = tunnel.create_upload_session(
                        t.name, partition_spec=op.partition_spec)
                else:
                    upload_session = tunnel.create_upload_session(t.name)
                break
            except:
                if retries >= retry_times:
                    raise
                time.sleep(1)

        logger.debug('Start writing table %s split index: %s', op.table_name,
                     op.inputs[0].index)

        retries = 0
        while True:
            try:
                writer = upload_session.open_arrow_writer(0)
                arrow_rb = pa.RecordBatch.from_pandas(ctx[op.inputs[0].key])
                writer.write(arrow_rb)
                writer.close()
                break
            except:
                if retries >= retry_times:
                    raise
                time.sleep(1)

        upload_session.commit([0])
        logger.debug('Finish writing table %s split index: %s', op.table_name,
                     op.inputs[0].index)

        ctx[op.outputs[0].key] = pd.DataFrame()
コード例 #15
0
 def __init__(self, conn_uri):
     super().__init__(conn_uri)
     self.params["database"] = self.params["curr_project"]
     # compose an endpoint, only keep the host and path and replace scheme
     endpoint = self.uripts._replace(scheme=self.params["scheme"],
                                     query="",
                                     netloc=self.uripts.hostname)
     self._conn = ODPS(self.uripts.username,
                       self.uripts.password,
                       project=self.params["database"],
                       endpoint=endpoint.geturl())
コード例 #16
0
 def setUp(self):
     self._project = os.environ[ODPSConfig.PROJECT_NAME]
     self._access_id = os.environ[ODPSConfig.ACCESS_ID]
     self._access_key = os.environ[ODPSConfig.ACCESS_KEY]
     self._endpoint = os.environ[ODPSConfig.ENDPOINT]
     self._test_read_table = "chicago_taxi_train_data"
     self._test_write_table = "test_odps_writer_%d_%d" % (
         int(time.time()),
         random.randint(1, 101),
     )
     self._odps_client = ODPS(self._access_id, self._access_key,
                              self._project, self._endpoint)
コード例 #17
0
    def get_connection(self, db_name=None):
        if self.conn:
            return self.conn

        db_name = db_name if db_name else self.instance.db_name

        if db_name is None:
            raise ValueError("db_name不能为空")

        self.conn = ODPS(self.user, self.password, project=db_name, endpoint=self.host)

        return self.conn
コード例 #18
0
    def __init__(
        self,
        project,
        access_id,
        access_key,
        endpoint,
        table,
        partition=None,
        num_processes=None,
        options=None,
        transform_fn=None,
        columns=None,
    ):
        """
        Constructs a `ODPSReader` instance.

        Args:
            project: Name of the ODPS project.
            access_id: ODPS user access ID.
            access_key: ODPS user access key.
            endpoint: ODPS cluster endpoint.
            table: ODPS table name.
            tunnel_endpoint: ODPS tunnel endpoint.
            partition: ODPS table's partition.
            options: Other options passed to ODPS context.
            num_processes: Number of parallel processes on this worker.
                If `None`, use the number of cores.
            transform_fn: Customized transfrom function
            columns: list of table column names
        """
        super(ODPSReader, self).__init__()

        if table.find(".") > 0:
            project, table = table.split(".")
        if options is None:
            options = {}
        self._project = project
        self._access_id = access_id
        self._access_key = access_key
        self._endpoint = endpoint
        self._table = table
        self._partition = partition
        self._num_processes = num_processes
        _configure_odps_options(self._endpoint, options)
        self._odps_table = ODPS(
            self._access_id,
            self._access_key,
            self._project,
            self._endpoint,
        ).get_table(self._table)

        self._transform_fn = transform_fn
        self._columns = columns
コード例 #19
0
ファイル: pull_dw_df.py プロジェクト: TeslaHou/Export_pdf
def connect(access_id=Config.ODPS_COMMON.access_id,
            secret_access_key=Config.ODPS_COMMON.secret_access_key,
            project=Config.ODPS_COMMON.project):
    '''
    实例化odps并返回该对象
    param access_id : 连接 datastudio的id   type : str
    param secret_access_key : 连接 datastudio的key  type : str
    param project : 选择的项目名称    type : str
    '''
    if not 'odps' in Config.CONNECT_DICT.keys():
        Config.CONNECT_DICT['odps'] = ODPS(access_id, secret_access_key,
                                           project)
    return Config.CONNECT_DICT['odps']
コード例 #20
0
    def _set_odps(self):
        if self._odps is not None:
            return

        if options.access_id is not None and \
                    options.access_key is not None and \
                    options.default_project is not None:
            self._odps = ODPS(
                options.access_id, options.access_key, options.default_project,
                endpoint=options.end_point, tunnel_endpoint=options.tunnel_endpoint
            )
        else:
            self._odps = enter().odps
コード例 #21
0
ファイル: maxcompute.py プロジェクト: zlb1028/sqlflow
    def connect(database, user, password, host):
        """
        Create a MaxCompute database connection object.

        Args:
            database: the MaxCompute project name.
            user: the MaxCompute AK.
            password: the MaxCompute SK.
            host: the MaxCompute endpoint address.

        Returns:
            A MaxCompute database connection object.
        """
        return ODPS(user, password, project=database, endpoint=host)
コード例 #22
0
ファイル: download_infos.py プロジェクト: P79N6A/AliGraph
def download_infos(tablename, storename, keys):
    o = ODPS("LTAIWt3hG5GvYBhX", "RriedkAIENmPvXvRmQcy9wRqOYx3QV", 'graph_embedding_intern_dev',
             endpoint='http://service-corp.odps.aliyun-inc.com/api')

    project = o.get_project()
    csv_file = open(storename, mode='w')
    writer = csv.writer(csv_file, delimiter='\t')

    tunnel = TableTunnel(o)
    download_session = tunnel.create_download_session(tablename)
    with download_session.open_record_reader(0, download_session.count) as reader:
        for record in reader:
            info = [record[key] for key in keys]
            writer.writerow(info)
    print("complete storing {}".format(storename))
コード例 #23
0
 def get_odps_conn(self):
     """
     连接ODPS
     :return:
     """
     odps_config = self.config_data.get('ODPS')
     try:
         conn = ODPS(access_id=odps_config['USER'],
                     secret_access_key=odps_config['PASSWD'],
                     project=odps_config['DBNAME'],
                     endpoint=odps_config['URL'])
     except:
         self.logging.error(traceback.format_exc())
         raise
     return conn
コード例 #24
0
    def _execute_arrow_tunnel(cls, ctx, op):
        from odps import ODPS
        from odps.tunnel import TableTunnel

        project = os.environ.get('ODPS_PROJECT_NAME', None)
        odps_params = op.odps_params.copy()
        if project:
            odps_params['project'] = project
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(odps_params['access_id'],
                 odps_params['secret_access_key'],
                 project=odps_params['project'],
                 endpoint=endpoint)

        t = o.get_table(op.table_name)
        tunnel = TableTunnel(o, project=t.project)

        if op.partition_spec is not None:
            download_session = tunnel.create_download_session(
                t.name, partition_spec=op.partition_spec)
        else:
            download_session = tunnel.create_download_session(t.name)
        logger.debug('Start reading table %s(%s) split from %s to %s',
                     op.table_name, op.partition_spec, op.start_index,
                     op.end_index)
        if op.nrows is None:
            count = op.end_index - op.start_index
        else:
            count = op.nrows

        with download_session.open_arrow_reader(op.start_index,
                                                count,
                                                columns=op.columns) as reader:
            table = reader.read()

        table = cls._append_partition_values(table, op)
        if op.string_as_binary:
            table = cls._cast_string_to_binary(table)
        data = arrow_table_to_pandas_dataframe(
            table, use_arrow_dtype=op.use_arrow_dtype)

        data = cls._align_columns(data, op.outputs[0].dtypes)

        logger.debug('Finish reading table %s(%s) split from %s to %s',
                     op.table_name, op.partition_spec, op.start_index,
                     op.end_index)
        ctx[op.outputs[0].key] = data
コード例 #25
0
 def setUp(self):
     self._project = os.environ[ODPSConfig.PROJECT_NAME]
     self._access_id = os.environ[ODPSConfig.ACCESS_ID]
     self._access_key = os.environ[ODPSConfig.ACCESS_KEY]
     self._endpoint = os.environ.get(ODPSConfig.ENDPOINT)
     self._test_read_table = "test_odps_reader_%d_%d" % (
         int(time.time()),
         random.randint(1, 101),
     )
     self._test_write_table = "test_odps_writer_%d_%d" % (
         int(time.time()),
         random.randint(1, 101),
     )
     self._odps_client = ODPS(self._access_id, self._access_key,
                              self._project, self._endpoint)
     self.create_iris_odps_table()
    def createTimeButtons(self, event=None):
        start_date_select = self.cal1.selection_get().strftime("%Y%m%d")
        end_date_select = self.cal2.selection_get().strftime("%Y%m%d")
        cat1_select = self.genreCombo.get()
        cat2_select = [
            self.Listbox1.get(v) for v in self.Listbox1.curselection()
        ]
        cat3_select = [
            self.Listbox2.get(a) for a in self.Listbox2.curselection()
        ]
        file_name = str(self.file_name.get())

        print(start_date_select)
        print(end_date_select)
        print(cat1_select)
        print(cat2_select)
        print(cat3_select)
        print(file_name)

        # Web Hooks

        o = ODPS('', '', '', endpoint='')
        options.tunnel.endpoint = '**API URL**'
        query = """SELECT  DISTINCT buyer_id
                        ,email_address
                        ,phone_number
                        ,venture_category1_name_en
                        ,venture_category2_name_en
                        ,venture_category3_name_en
                  FROM   Table Name AS t
                  WHERE  venture = ""
                  AND    order_status_esm NOT IN ('invalid')
                  AND    category1_name_en in ('""" + cat1_select + """')
                  AND    category2_name_en in (""" + "'" + "','".join(
            cat2_select) + "'" + """)
                  AND    category3_name_en in (""" + "'" + "','".join(
                cat3_select) + "'" + """)
                  AND    TO_CHAR(t.order_create_date,'yyyymmdd') BETWEEN '""" + start_date_select + """'
                  AND    '""" + end_date_select + """'
                  """
        # Executing the query and set the data into a dataframe
        df = o.execute_sql(query).open_reader().to_result_frame().to_pandas()
        df.columns = df.columns.str.upper()
        date_string = arrow.now().format('YYYYMMDD') + ' ' + file_name
        df.to_csv(str(str(date_string) + '.csv'), index=False)
        print(query)
コード例 #27
0
def _handle_commit_table_upload_session(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, cupid_handle, blocks, overwrite
        commit_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext
        from cupid.io.table import CupidTableUploadSession

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        odps_params = commit_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        project_name, table_name = commit_config['table_name'].split('.')
        upload_session = CupidTableUploadSession(
            session=cupid_session,
            table_name=table_name,
            project_name=project_name,
            handle=commit_config['cupid_handle'],
            blocks=commit_config['blocks'])
        upload_session.commit(overwrite=commit_config['overwrite'])

        _write_request_result(sock)
    except:
        logger.exception('Failed to commit upload session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #28
0
def load_ipython_extension(ipython):
    params = {}
    for key in ("AccessKeyId", "AccessKeySecret", "Project", "Endpoint"):
        val = os.getenv(key)
        if val:
            params[key] = val
    path = os.path.expanduser("~") + "/.aliyun_profile"
    if os.path.exists(path):
        with open(path) as f:
            for item in f.read().strip().split("\n"):
                key, param = item.split("=",1)
                params[key] = param
    elif len(params.keys()) != 4:
        raise ValueError("Not Set enough param")

    myodps = ODPS(params["AccessKeyId"], params['AccessKeySecret'], params['Project'], endpoint=params['Endpoint'])
    magic = ODPSMagic(ipython, myodps)
    ipython.register_magics(magic)
コード例 #29
0
def _handle_enum_table_partitions(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, partition
        task_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import context

        cupid_ctx = context()

        odps_params = task_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)

        table = o.get_table(task_config['table_name'])
        partition_desc = task_config.get('partition')
        if not table.schema.partitions:
            _write_request_result(sock, result=None)
        elif partition_desc:
            if check_partition_exist(table, partition_desc):
                _write_request_result(sock, result=[partition_desc])
            else:
                parts = filter_partitions(o, list(table.partitions),
                                          partition_desc)
                _write_request_result(
                    sock, result=[str(pt.partition_spec) for pt in parts])
        else:
            _write_request_result(
                sock,
                result=[str(pt.partition_spec) for pt in table.partitions])
    except:
        logger.exception('Failed to create download session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #30
0
ファイル: odps_io.py プロジェクト: yupbank/elasticdl
    def __init__(
        self,
        project,
        access_id,
        access_key,
        endpoint,
        table,
        columns=None,
        column_types=None,
        options=None,
    ):
        """
        Constructs a `ODPSWriter` instance.

        Args:
            project: Name of the ODPS project.
            access_id: ODPS user access ID.
            access_key: ODPS user access key.
            endpoint: ODPS cluster endpoint.
            table: ODPS table name.
            columns: The list of column names in the table,
                which will be inferred if the table exits.
            column_types" The list of column types in the table,
                which will be inferred if the table exits.
            options: Other options passed to ODPS context.
        """
        super(ODPSWriter, self).__init__()

        if table.find(".") > 0:
            project, table = table.split(".")
        if options is None:
            options = {}
        self._project = project
        self._access_id = access_id
        self._access_key = access_key
        self._endpoint = endpoint
        self._table = table
        self._columns = columns
        self._column_types = column_types
        self._odps_table = None
        _configure_odps_options(self._endpoint, options)
        self._odps_client = ODPS(
            self._access_id, self._access_key, self._project, self._endpoint
        )