def testTokenizedSignServerAccount(self): server = SignServer(token=str(uuid.uuid4())) server.accounts[ self.odps.account.access_id] = self.odps.account.secret_access_key try: server.start(('127.0.0.1', 0)) account = SignServerAccount(self.odps.account.access_id, server.server.server_address) odps = ODPS(None, None, self.odps.project, self.odps.endpoint, account=account) self.assertRaises( SignServerError, lambda: odps.delete_table(tn('test_sign_account_table'), if_exists=True)) account = SignServerAccount(self.odps.account.access_id, server.server.server_address, token=server.token) odps = ODPS(None, None, self.odps.project, self.odps.endpoint, account=account) odps.delete_table(tn('test_sign_account_table'), if_exists=True) t = odps.create_table(tn('test_sign_account_table'), 'col string', lifecycle=1) self.assertTrue(odps.exist_table(tn('test_sign_account_table'))) t.drop(async=True) finally: server.stop()
def testBearerTokenAccount(self): self.odps.delete_table(tn('test_bearer_token_account_table'), if_exists=True) t = self.odps.create_table(tn('test_bearer_token_account_table'), 'col string', lifecycle=1) with t.open_writer() as writer: records = [['val1'], ['val2'], ['val3']] writer.write(records) inst = self.odps.execute_sql('select count(*) from {0}'.format( tn('test_bearer_token_account_table')), async_=True) inst.wait_for_success() task_name = inst.get_task_names()[0] logview_address = inst.get_logview_address() token = logview_address[logview_address.find('token=') + len('token='):] bearer_token_account = BearerTokenAccount(token=token) bearer_token_odps = ODPS(None, None, self.odps.project, self.odps.endpoint, account=bearer_token_account) bearer_token_instance = bearer_token_odps.get_instance(inst.id) self.assertEqual(inst.get_task_result(task_name), bearer_token_instance.get_task_result(task_name)) self.assertEqual(inst.get_task_summary(task_name), bearer_token_instance.get_task_summary(task_name)) with self.assertRaises(errors.NoPermission): bearer_token_odps.create_table( tn('test_bearer_token_account_table_test1'), 'col string', lifecycle=1) fake_token_account = BearerTokenAccount(token='fake-token') bearer_token_odps = ODPS(None, None, self.odps.project, self.odps.endpoint, account=fake_token_account) with self.assertRaises(errors.ODPSError): bearer_token_odps.create_table( tn('test_bearer_token_account_table_test2'), 'col string', lifecycle=1)
def check_instance_idle(self): last_active_time = self._last_activity_time has_running = False for ref in self._session_refs.values(): for info in ref.get_graph_infos().values(): if info.get('end_time') is None: has_running = True break else: last_active_time = max(info['end_time'], last_active_time) if has_running: break if not has_running and last_active_time < time.time( ) - self._idle_timeout: # timeout: we need to kill the instance from odps import ODPS from odps.accounts import BearerTokenAccount from cupid.runtime import context logger.warning('Timeout met, killing the instance now.') bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(os.environ['MARS_K8S_POD_NAMESPACE']) else: self.ref().check_instance_idle(_delay=10, _tell=True, _wait=False)
def load2excel(filepath, begin, end): o = ODPS(ACCESS_KEY_ID, ACCESS_KEY_SECRET, PROJECT, endpoint='http://service.odps.aliyun.com/api') print(o) sql = "select * from stat_table where ds>={begin} and ds<={end}".format(begin=begin, end=end) with o.execute_sql(sql).open_reader() as reader: print(sql) print(reader.count) table_title = reader._schema.names print(table_title) workbook = openpyxl.Workbook() worksheet = workbook.create_sheet('Sheet') for i in range(len(table_title)): worksheet.cell(1, i + 1, table_title[i]) write_count = 2 for r in reader: for i in range(len(r.values)): # 10个依次写入当前行 worksheet.cell(write_count, i + 1, r.values[i]) write_count = write_count + 1 workbook.save(filename=filepath)
def _handle_terminate_instance(sock): from cupid.runtime import context, RuntimeContext from odps import ODPS from odps.accounts import BearerTokenAccount try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with key cmd_body = pickle.loads(sock.recv(cmd_len)) instance_id = cmd_body['instance_id'] if not RuntimeContext.is_context_ready(): logger.warning('Cupid context not ready') else: bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(instance_id) except: logger.exception('Failed to put kv value') _write_request_result(sock, False, exc_info=sys.exc_info())
def setUp(self): self.project = os.environ[MaxComputeConfig.PROJECT_NAME] access_id = os.environ[MaxComputeConfig.ACCESS_ID] access_key = os.environ[MaxComputeConfig.ACCESS_KEY] endpoint = os.environ.get(MaxComputeConfig.ENDPOINT) tunnel_endpoint = os.environ.get(MaxComputeConfig.TUNNEL_ENDPOINT, None) self.test_table = "test_odps_data_reader_%d_%d" % ( int(time.time()), random.randint(1, 101), ) self.odps_client = ODPS(access_id, access_key, self.project, endpoint) create_iris_odps_table(self.odps_client, self.project, self.test_table) self.records_per_task = 50 self.reader = ODPSDataReader( project=self.project, access_id=access_id, access_key=access_key, endpoint=endpoint, table=self.test_table, tunnel_endpoint=tunnel_endpoint, num_processes=1, records_per_task=self.records_per_task, )
def check_instance_idle(self): from cupid.runtime import context has_running, active_time_from_service = self._get_service_activity_info( ) if active_time_from_service != self._last_active_time_from_service: self._last_active_time = active_time_from_service self._last_active_time_from_service = active_time_from_service elif has_running: self._last_active_time = time.time() if self._last_active_time < time.time() - self._idle_timeout: # timeout: we need to kill the instance from odps import ODPS from odps.accounts import BearerTokenAccount logger.warning('Timeout met, killing the instance now.') bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(os.environ['MARS_K8S_POD_NAMESPACE']) else: kv_store = context().kv_store() kv_store[CUPID_LAST_IDLE_TIME_KEY] = str(self._last_active_time) self.ref().check_instance_idle(_delay=10, _tell=True, _wait=False)
def execute(cls, ctx, op): import pandas as pd from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.io.table import CupidTableUploadSession if op.is_terminal: bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) odps_params = op.odps_params.copy() if project: odps_params['project'] = project endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=odps_params['project'], endpoint=endpoint) cupid_session = CupidSession(o) project_name, table_name = op.table_name.split('.') upload_session = CupidTableUploadSession(session=cupid_session, table_name=table_name, project_name=project_name, handle=op.cupid_handle, blocks=op.blocks) upload_session.commit(overwrite=op.overwrite) ctx[op.outputs[0].key] = pd.DataFrame()
def _execute_in_cupid(cls, ctx, op): import os import pandas as pd from odps import ODPS from odps.accounts import BearerTokenAccount cupid_client = CupidServiceClient() to_store_data = ctx[op.inputs[0].key] bearer_token = cupid_client.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) odps_params = op.odps_params.copy() if project: odps_params['project'] = project endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=odps_params['project'], endpoint=endpoint) odps_schema = o.get_table(op.table_name).schema project_name, table_name = op.table_name.split('.') writer_config = dict(_table_name=table_name, _project_name=project_name, _table_schema=odps_schema, _partition_spec=op.partition_spec, _block_id=op.block_id, _handle=op.cupid_handle) cupid_client.write_table_data(writer_config, to_store_data, op.write_batch_size) ctx[op.outputs[0].key] = pd.DataFrame()
def get_lt_etid(): logging.info('12. 正在获取其他的企业 ') print utils.current_time(), '建立odps链接..' o = ODPS('LTAIzEuNzcL6qJJ8', 'eUAgj9ijhWCvOQ3w5Uv3FkwhNxvPF2', 'database_test', 'http://service.odps.aliyun.com/api') print utils.current_time(), '进行查询...' pt = time.strftime('%Y%m%d', time.localtime(int(time.time() - 86400))) res = o.execute_sql( "select distinct etid from et_jobs where pt='{}' and isheadhunter=1". format(pt)) print utils.current_time(), '处理查询结果...' etid_set = set() conn = utils.get_local_db() addtime = int(time.time()) cnt = 0 with res.open_reader() as reader: print utils.current_time(), '共需处理{}条!'.format(reader.count) for record in reader: etid_set.add((record['etid'], )) if len(etid_set) >= 1000: conn.executemany( "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)" .format(addtime), list(etid_set)) cnt += 1000 print utils.current_time(), '当前已写入{}条!'.format(cnt) etid_set.clear() if len(etid_set) > 0: conn.executemany( "insert into et_info_status(etid,addtime) values(%s,{})on duplicate key update etid=values(etid), addtime=values(addtime)" .format(addtime), list(etid_set)) cnt += len(etid_set) print utils.current_time(), '当前已写入{}条!'.format(cnt) conn.close() return reader.count
def __init__(self, conn_uri): super(MaxComputeConnection, self).__init__(conn_uri) user, pwd, endpoint, proj = MaxComputeConnection.get_uri_parts( conn_uri) self.driver = "maxcompute" self.params["database"] = proj self.endpoint = endpoint self._conn = ODPS(user, pwd, project=proj, endpoint=endpoint)
def tile(cls, op): from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) o = ODPS(None, None, account=account, **op.odps_params) cupid_session = CupidSession(o) data_src = o.get_table(op.table_name) logger.debug('Start creating upload session from cupid.') upload_session = cupid_session.create_upload_session(data_src) input_df = op.inputs[0] out_chunks = [] out_chunk_shape = (0,) * len(input_df.shape) blocks = {} for chunk in input_df.chunks: block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace('-', '') chunk_op = DataFrameWriteTableSplit(dtypes=op.dtypes, table_name=op.table_name, partition_spec=op.partition_spec, cupid_handle=to_str(upload_session.handle), block_id=block_id, write_batch_size=op.write_batch_size) out_chunk = chunk_op.new_chunk([chunk], shape=out_chunk_shape, index=chunk.index, dtypes=chunk.dtypes) out_chunks.append(out_chunk) blocks[block_id] = op.partition_spec # build commit tree combine_size = 8 chunks = out_chunks while len(chunks) > combine_size: new_chunks = [] for i in range(0, len(chunks), combine_size): chks = chunks[i: i + combine_size] if len(chks) == 1: chk = chks[0] else: chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes, is_terminal=False) chk = chk_op.new_chunk(chks, shape=out_chunk_shape, dtypes=op.dtypes) new_chunks.append(chk) chunks = new_chunks assert len(chunks) < combine_size commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes, table_name=op.table_name, blocks=blocks, cupid_handle=to_str(upload_session.handle), overwrite=op.overwrite, odps_params=op.odps_params, is_terminal=True) commit_table_chunk = commit_table_op.new_chunk(chunks, shape=out_chunk_shape, dtypes=op.dtypes) out_df = op.outputs[0] new_op = op.copy() return new_op.new_dataframes(op.inputs, shape=out_df.shape, dtypes=out_df.dtypes, chunks=[commit_table_chunk], nsplits=((0,),) * len(out_chunk_shape))
def _get_table_schema(self): odps_client = ODPS( access_id=self._kwargs["access_id"], secret_access_key=self._kwargs["access_key"], project=self._kwargs["project"], endpoint=self._kwargs.get("endpoint"), ) odps_table = odps_client.get_table(self._kwargs["table"]) return odps_table.schema
def _execute_arrow_tunnel(cls, ctx, op): from odps import ODPS from odps.tunnel import TableTunnel import pyarrow as pa import pandas as pd project = os.environ.get('ODPS_PROJECT_NAME', None) odps_params = op.odps_params.copy() if project: odps_params['project'] = project endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(odps_params['access_id'], odps_params['secret_access_key'], project=odps_params['project'], endpoint=endpoint) t = o.get_table(op.table_name) tunnel = TableTunnel(o, project=t.project) retry_times = options.retry_times retries = 0 while True: try: if op.partition_spec is not None: upload_session = tunnel.create_upload_session( t.name, partition_spec=op.partition_spec) else: upload_session = tunnel.create_upload_session(t.name) break except: if retries >= retry_times: raise time.sleep(1) logger.debug('Start writing table %s split index: %s', op.table_name, op.inputs[0].index) retries = 0 while True: try: writer = upload_session.open_arrow_writer(0) arrow_rb = pa.RecordBatch.from_pandas(ctx[op.inputs[0].key]) writer.write(arrow_rb) writer.close() break except: if retries >= retry_times: raise time.sleep(1) upload_session.commit([0]) logger.debug('Finish writing table %s split index: %s', op.table_name, op.inputs[0].index) ctx[op.outputs[0].key] = pd.DataFrame()
def __init__(self, conn_uri): super().__init__(conn_uri) self.params["database"] = self.params["curr_project"] # compose an endpoint, only keep the host and path and replace scheme endpoint = self.uripts._replace(scheme=self.params["scheme"], query="", netloc=self.uripts.hostname) self._conn = ODPS(self.uripts.username, self.uripts.password, project=self.params["database"], endpoint=endpoint.geturl())
def setUp(self): self._project = os.environ[ODPSConfig.PROJECT_NAME] self._access_id = os.environ[ODPSConfig.ACCESS_ID] self._access_key = os.environ[ODPSConfig.ACCESS_KEY] self._endpoint = os.environ[ODPSConfig.ENDPOINT] self._test_read_table = "chicago_taxi_train_data" self._test_write_table = "test_odps_writer_%d_%d" % ( int(time.time()), random.randint(1, 101), ) self._odps_client = ODPS(self._access_id, self._access_key, self._project, self._endpoint)
def get_connection(self, db_name=None): if self.conn: return self.conn db_name = db_name if db_name else self.instance.db_name if db_name is None: raise ValueError("db_name不能为空") self.conn = ODPS(self.user, self.password, project=db_name, endpoint=self.host) return self.conn
def __init__( self, project, access_id, access_key, endpoint, table, partition=None, num_processes=None, options=None, transform_fn=None, columns=None, ): """ Constructs a `ODPSReader` instance. Args: project: Name of the ODPS project. access_id: ODPS user access ID. access_key: ODPS user access key. endpoint: ODPS cluster endpoint. table: ODPS table name. tunnel_endpoint: ODPS tunnel endpoint. partition: ODPS table's partition. options: Other options passed to ODPS context. num_processes: Number of parallel processes on this worker. If `None`, use the number of cores. transform_fn: Customized transfrom function columns: list of table column names """ super(ODPSReader, self).__init__() if table.find(".") > 0: project, table = table.split(".") if options is None: options = {} self._project = project self._access_id = access_id self._access_key = access_key self._endpoint = endpoint self._table = table self._partition = partition self._num_processes = num_processes _configure_odps_options(self._endpoint, options) self._odps_table = ODPS( self._access_id, self._access_key, self._project, self._endpoint, ).get_table(self._table) self._transform_fn = transform_fn self._columns = columns
def connect(access_id=Config.ODPS_COMMON.access_id, secret_access_key=Config.ODPS_COMMON.secret_access_key, project=Config.ODPS_COMMON.project): ''' 实例化odps并返回该对象 param access_id : 连接 datastudio的id type : str param secret_access_key : 连接 datastudio的key type : str param project : 选择的项目名称 type : str ''' if not 'odps' in Config.CONNECT_DICT.keys(): Config.CONNECT_DICT['odps'] = ODPS(access_id, secret_access_key, project) return Config.CONNECT_DICT['odps']
def _set_odps(self): if self._odps is not None: return if options.access_id is not None and \ options.access_key is not None and \ options.default_project is not None: self._odps = ODPS( options.access_id, options.access_key, options.default_project, endpoint=options.end_point, tunnel_endpoint=options.tunnel_endpoint ) else: self._odps = enter().odps
def connect(database, user, password, host): """ Create a MaxCompute database connection object. Args: database: the MaxCompute project name. user: the MaxCompute AK. password: the MaxCompute SK. host: the MaxCompute endpoint address. Returns: A MaxCompute database connection object. """ return ODPS(user, password, project=database, endpoint=host)
def download_infos(tablename, storename, keys): o = ODPS("LTAIWt3hG5GvYBhX", "RriedkAIENmPvXvRmQcy9wRqOYx3QV", 'graph_embedding_intern_dev', endpoint='http://service-corp.odps.aliyun-inc.com/api') project = o.get_project() csv_file = open(storename, mode='w') writer = csv.writer(csv_file, delimiter='\t') tunnel = TableTunnel(o) download_session = tunnel.create_download_session(tablename) with download_session.open_record_reader(0, download_session.count) as reader: for record in reader: info = [record[key] for key in keys] writer.writerow(info) print("complete storing {}".format(storename))
def get_odps_conn(self): """ 连接ODPS :return: """ odps_config = self.config_data.get('ODPS') try: conn = ODPS(access_id=odps_config['USER'], secret_access_key=odps_config['PASSWD'], project=odps_config['DBNAME'], endpoint=odps_config['URL']) except: self.logging.error(traceback.format_exc()) raise return conn
def _execute_arrow_tunnel(cls, ctx, op): from odps import ODPS from odps.tunnel import TableTunnel project = os.environ.get('ODPS_PROJECT_NAME', None) odps_params = op.odps_params.copy() if project: odps_params['project'] = project endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(odps_params['access_id'], odps_params['secret_access_key'], project=odps_params['project'], endpoint=endpoint) t = o.get_table(op.table_name) tunnel = TableTunnel(o, project=t.project) if op.partition_spec is not None: download_session = tunnel.create_download_session( t.name, partition_spec=op.partition_spec) else: download_session = tunnel.create_download_session(t.name) logger.debug('Start reading table %s(%s) split from %s to %s', op.table_name, op.partition_spec, op.start_index, op.end_index) if op.nrows is None: count = op.end_index - op.start_index else: count = op.nrows with download_session.open_arrow_reader(op.start_index, count, columns=op.columns) as reader: table = reader.read() table = cls._append_partition_values(table, op) if op.string_as_binary: table = cls._cast_string_to_binary(table) data = arrow_table_to_pandas_dataframe( table, use_arrow_dtype=op.use_arrow_dtype) data = cls._align_columns(data, op.outputs[0].dtypes) logger.debug('Finish reading table %s(%s) split from %s to %s', op.table_name, op.partition_spec, op.start_index, op.end_index) ctx[op.outputs[0].key] = data
def setUp(self): self._project = os.environ[ODPSConfig.PROJECT_NAME] self._access_id = os.environ[ODPSConfig.ACCESS_ID] self._access_key = os.environ[ODPSConfig.ACCESS_KEY] self._endpoint = os.environ.get(ODPSConfig.ENDPOINT) self._test_read_table = "test_odps_reader_%d_%d" % ( int(time.time()), random.randint(1, 101), ) self._test_write_table = "test_odps_writer_%d_%d" % ( int(time.time()), random.randint(1, 101), ) self._odps_client = ODPS(self._access_id, self._access_key, self._project, self._endpoint) self.create_iris_odps_table()
def createTimeButtons(self, event=None): start_date_select = self.cal1.selection_get().strftime("%Y%m%d") end_date_select = self.cal2.selection_get().strftime("%Y%m%d") cat1_select = self.genreCombo.get() cat2_select = [ self.Listbox1.get(v) for v in self.Listbox1.curselection() ] cat3_select = [ self.Listbox2.get(a) for a in self.Listbox2.curselection() ] file_name = str(self.file_name.get()) print(start_date_select) print(end_date_select) print(cat1_select) print(cat2_select) print(cat3_select) print(file_name) # Web Hooks o = ODPS('', '', '', endpoint='') options.tunnel.endpoint = '**API URL**' query = """SELECT DISTINCT buyer_id ,email_address ,phone_number ,venture_category1_name_en ,venture_category2_name_en ,venture_category3_name_en FROM Table Name AS t WHERE venture = "" AND order_status_esm NOT IN ('invalid') AND category1_name_en in ('""" + cat1_select + """') AND category2_name_en in (""" + "'" + "','".join( cat2_select) + "'" + """) AND category3_name_en in (""" + "'" + "','".join( cat3_select) + "'" + """) AND TO_CHAR(t.order_create_date,'yyyymmdd') BETWEEN '""" + start_date_select + """' AND '""" + end_date_select + """' """ # Executing the query and set the data into a dataframe df = o.execute_sql(query).open_reader().to_result_frame().to_pandas() df.columns = df.columns.str.upper() date_string = arrow.now().format('YYYYMMDD') + ' ' + file_name df.to_csv(str(str(date_string) + '.csv'), index=False) print(query)
def _handle_commit_table_upload_session(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with odps_params, table_name, cupid_handle, blocks, overwrite commit_config = pickle.loads(sock.recv(cmd_len)) from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.runtime import RuntimeContext from cupid.io.table import CupidTableUploadSession if not RuntimeContext.is_context_ready(): raise SystemError( 'No Mars cluster found, please create via `o.create_mars_cluster`.' ) cupid_ctx = context() odps_params = commit_config['odps_params'] bearer_token = cupid_ctx.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) or odps_params['project'] endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) cupid_session = CupidSession(o) project_name, table_name = commit_config['table_name'].split('.') upload_session = CupidTableUploadSession( session=cupid_session, table_name=table_name, project_name=project_name, handle=commit_config['cupid_handle'], blocks=commit_config['blocks']) upload_session.commit(overwrite=commit_config['overwrite']) _write_request_result(sock) except: logger.exception('Failed to commit upload session') _write_request_result(sock, False, exc_info=sys.exc_info())
def load_ipython_extension(ipython): params = {} for key in ("AccessKeyId", "AccessKeySecret", "Project", "Endpoint"): val = os.getenv(key) if val: params[key] = val path = os.path.expanduser("~") + "/.aliyun_profile" if os.path.exists(path): with open(path) as f: for item in f.read().strip().split("\n"): key, param = item.split("=",1) params[key] = param elif len(params.keys()) != 4: raise ValueError("Not Set enough param") myodps = ODPS(params["AccessKeyId"], params['AccessKeySecret'], params['Project'], endpoint=params['Endpoint']) magic = ODPSMagic(ipython, myodps) ipython.register_magics(magic)
def _handle_enum_table_partitions(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with odps_params, table_name, partition task_config = pickle.loads(sock.recv(cmd_len)) from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import context cupid_ctx = context() odps_params = task_config['odps_params'] bearer_token = cupid_ctx.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) or odps_params['project'] endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) table = o.get_table(task_config['table_name']) partition_desc = task_config.get('partition') if not table.schema.partitions: _write_request_result(sock, result=None) elif partition_desc: if check_partition_exist(table, partition_desc): _write_request_result(sock, result=[partition_desc]) else: parts = filter_partitions(o, list(table.partitions), partition_desc) _write_request_result( sock, result=[str(pt.partition_spec) for pt in parts]) else: _write_request_result( sock, result=[str(pt.partition_spec) for pt in table.partitions]) except: logger.exception('Failed to create download session') _write_request_result(sock, False, exc_info=sys.exc_info())
def __init__( self, project, access_id, access_key, endpoint, table, columns=None, column_types=None, options=None, ): """ Constructs a `ODPSWriter` instance. Args: project: Name of the ODPS project. access_id: ODPS user access ID. access_key: ODPS user access key. endpoint: ODPS cluster endpoint. table: ODPS table name. columns: The list of column names in the table, which will be inferred if the table exits. column_types" The list of column types in the table, which will be inferred if the table exits. options: Other options passed to ODPS context. """ super(ODPSWriter, self).__init__() if table.find(".") > 0: project, table = table.split(".") if options is None: options = {} self._project = project self._access_id = access_id self._access_key = access_key self._endpoint = endpoint self._table = table self._columns = columns self._column_types = column_types self._odps_table = None _configure_odps_options(self._endpoint, options) self._odps_client = ODPS( self._access_id, self._access_key, self._project, self._endpoint )