def wrapper(ctx, op): from cupid import context from mars.utils import to_str old_envs = os.environ.copy() try: if context() is None: logger.debug('Not in ODPS environment.') f(ctx, op) else: env = os.environ logger.debug('Get bearer token from cupid.') bearer_token = context().get_bearer_token() env['ODPS_BEARER_TOKEN'] = to_str(bearer_token) if 'endpoint' in op.extra_params: env['ODPS_ENDPOINT'] = str(op.extra_params['endpoint']) if ('project' in op.extra_params) and ('ODPS_PROJECT_NAME' not in env): env['ODPS_PROJECT_NAME'] = str(op.extra_params['project']) f(ctx, op) for out in op.outputs: if ctx[out.key] is None: ctx[out.key] = {'status': 'OK'} finally: os.environ = old_envs
def _handle_terminate_instance(sock): from cupid.runtime import context, RuntimeContext from odps import ODPS from odps.accounts import BearerTokenAccount try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with key cmd_body = pickle.loads(sock.recv(cmd_len)) instance_id = cmd_body['instance_id'] if not RuntimeContext.is_context_ready(): logger.warning('Cupid context not ready') else: bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(instance_id) except: logger.exception('Failed to put kv value') _write_request_result(sock, False, exc_info=sys.exc_info())
def execute(cls, ctx, op): import pandas as pd from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.io.table import CupidTableUploadSession if op.is_terminal: bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) odps_params = op.odps_params.copy() if project: odps_params['project'] = project endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=odps_params['project'], endpoint=endpoint) cupid_session = CupidSession(o) project_name, table_name = op.table_name.split('.') upload_session = CupidTableUploadSession(session=cupid_session, table_name=table_name, project_name=project_name, handle=op.cupid_handle, blocks=op.blocks) upload_session.commit(overwrite=op.overwrite) ctx[op.outputs[0].key] = pd.DataFrame()
def _main(): from cupid import context from mars.utils import get_next_port cupid_context = context() mars_endpoint = wait_mars_ready(cupid_context.kv_store(), CUPID_APP_NAME) host_addr = socket.gethostbyname(socket.gethostname()) os.environ.pop('KUBE_API_ADDRESS') if os.environ.get('VM_ENGINE_TYPE') == 'hyper': notebook_port = DEFAULT_NOTEBOOK_PORT else: notebook_port = str(get_next_port()) endpoint = 'http://{0}:{1}'.format(host_addr, notebook_port) # dump endpoint to ~/.mars dump_endpoint(mars_endpoint) # add startup script for notebook config_startup() # start notebook start_notebook(notebook_port) # modify in hyper mode if os.environ.get('VM_ENGINE_TYPE') == 'hyper': endpoint = socket.gethostname() + "-{}".format(notebook_port) cupid_context.register_application(NOTEBOOK_NAME, endpoint) asyncio.run(create_bearer_token_actor())
def _main(): argv = sys.argv[1] args_dict = json.loads(base64.b64decode(argv).decode()) print('launch graphscope:', args_dict) from cupid import context cupid_context = context() host_addr = socket.gethostbyname(socket.gethostname()) os.environ.pop('KUBE_API_ADDRESS') coordinator_port = args_dict.get('port', None) or DEFAULT_GS_COORDINATOR_PORT coordinator_gateway_port = args_dict.get('gateway_port', None) or DEFAULT_GS_COORDINATOR_GATEWAY_PORT endpoint = 'http://{0}:{1}'.format(host_addr, coordinator_gateway_port) kvstore = cupid_context.kv_store() kvstore[GS_COORDINATOR_NAME] = json.dumps(dict(endpoint=endpoint)) # start coordinator vineyard_socket = os.environ.get('VINEYARD_IPC_SOCKET', '/tmp/vineyard.sock') start_coordinator(args_dict, coordinator_port, vineyard_socket) start_coordinator_gateway(args_dict, coordinator_port, coordinator_gateway_port) # modify in hyper mode if os.environ.get('VM_ENGINE_TYPE') == 'hyper': endpoint = socket.gethostname() + "-{}".format(coordinator_port) cupid_context.register_application(GS_COORDINATOR_NAME, endpoint)
def write_cupid_service_info(self, cupid_key): from cupid import context self.cupid_context = context() kvstore = self.cupid_context.kv_store() kvstore[cupid_key] = json.dumps(dict(endpoint=self.endpoint)) logger.info('Service endpoint %s written in key %s', self.endpoint, cupid_key)
def get_bearer_token(): from cupid import context cupid_context = context() if cupid_context is None: return return cupid_context.get_bearer_token()
def tile(cls, op): from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) o = ODPS(None, None, account=account, **op.odps_params) cupid_session = CupidSession(o) data_src = o.get_table(op.table_name) logger.debug('Start creating upload session from cupid.') upload_session = cupid_session.create_upload_session(data_src) input_df = op.inputs[0] out_chunks = [] out_chunk_shape = (0,) * len(input_df.shape) blocks = {} for chunk in input_df.chunks: block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace('-', '') chunk_op = DataFrameWriteTableSplit(dtypes=op.dtypes, table_name=op.table_name, partition_spec=op.partition_spec, cupid_handle=to_str(upload_session.handle), block_id=block_id, write_batch_size=op.write_batch_size) out_chunk = chunk_op.new_chunk([chunk], shape=out_chunk_shape, index=chunk.index, dtypes=chunk.dtypes) out_chunks.append(out_chunk) blocks[block_id] = op.partition_spec # build commit tree combine_size = 8 chunks = out_chunks while len(chunks) > combine_size: new_chunks = [] for i in range(0, len(chunks), combine_size): chks = chunks[i: i + combine_size] if len(chks) == 1: chk = chks[0] else: chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes, is_terminal=False) chk = chk_op.new_chunk(chks, shape=out_chunk_shape, dtypes=op.dtypes) new_chunks.append(chk) chunks = new_chunks assert len(chunks) < combine_size commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes, table_name=op.table_name, blocks=blocks, cupid_handle=to_str(upload_session.handle), overwrite=op.overwrite, odps_params=op.odps_params, is_terminal=True) commit_table_chunk = commit_table_op.new_chunk(chunks, shape=out_chunk_shape, dtypes=op.dtypes) out_df = op.outputs[0] new_op = op.copy() return new_op.new_dataframes(op.inputs, shape=out_df.shape, dtypes=out_df.dtypes, chunks=[commit_table_chunk], nsplits=((0,),) * len(out_chunk_shape))
def start_channel(self, envs): from cupid import context os.environ.update(envs) self._cupid_context = context() odps_envs = { 'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'], 'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'], } os.environ.update(odps_envs)
def _prepare_channel(channel_file): while not os.path.exists(channel_file): time.sleep(1) try: with open(channel_file, 'r') as env_file: envs = json.loads(env_file.read()) except: time.sleep(1) with open(channel_file, 'r') as env_file: envs = json.loads(env_file.read()) from cupid import context os.environ.update(envs) context() odps_envs = { 'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'], 'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'], } os.environ.update(odps_envs) logger.info('Started channel for Cupid Server.')
def _handle_get_bearer_token(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with odps_params, table_name, cupid_handle, blocks, overwrite commit_config = pickle.loads(sock.recv(cmd_len)) from cupid import context bearer_token = context().get_bearer_token() _write_request_result(sock, result={'token': bearer_token}) except: logger.exception('Failed to get bearer token') _write_request_result(sock, False, exc_info=sys.exc_info())
def _check_bearer_token(self): from cupid import context cupid_context = context() if cupid_context is None: return t = datetime.now() if self._last_modified_time is None: token = cupid_context.get_bearer_token() if token != self._token: self._token = token self._last_modified_time = datetime.now() elif (t - self._last_modified_time) > self._expired_time: self._token = cupid_context.get_bearer_token() self._last_modified_time = datetime.now()
def start(self): from mars.actors import new_client from cupid import context self.cupid_context = context() self.read_cupid_service_info(self.args.cupid_scheduler_key) self.create_scheduler_discoverer() super(CupidWorkerServiceMain, self).start() actor_client = new_client() proc_helpers = self._service._process_helper_actors for proc_helper_actor in proc_helpers: envs = self.cupid_context.prepare_channel() proc_helper_ref = actor_client.actor_ref(proc_helper_actor) new_envs = dict((env.name, env.value) for env in envs) proc_helper_ref.start_channel(new_envs)
def run(self): if self.processes: return super().run() from cupid import context self._cupid_context = context() kvstore = self._cupid_context.kv_store() advertise_endpoint = self.advertise_address.split(':')[0] \ + ':' + self.address.split(':')[-1] kvstore[os.environ['MARS_K8S_POD_NAME']] = json.dumps(dict(endpoint=advertise_endpoint)) logger.debug('Endpoint %s written to %s', advertise_endpoint, os.environ['MARS_K8S_POD_NAME']) for idx in range(len(self.processes)): self._prepare_process_channel(idx)
def _handle_commit_table_upload_session(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with odps_params, table_name, cupid_handle, blocks, overwrite commit_config = pickle.loads(sock.recv(cmd_len)) from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.runtime import RuntimeContext from cupid.io.table import CupidTableUploadSession if not RuntimeContext.is_context_ready(): raise SystemError( 'No Mars cluster found, please create via `o.create_mars_cluster`.' ) cupid_ctx = context() odps_params = commit_config['odps_params'] bearer_token = cupid_ctx.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) or odps_params['project'] endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) cupid_session = CupidSession(o) project_name, table_name = commit_config['table_name'].split('.') upload_session = CupidTableUploadSession( session=cupid_session, table_name=table_name, project_name=project_name, handle=commit_config['cupid_handle'], blocks=commit_config['blocks']) upload_session.commit(overwrite=commit_config['overwrite']) _write_request_result(sock) except: logger.exception('Failed to commit upload session') _write_request_result(sock, False, exc_info=sys.exc_info())
def _handle_enum_table_partitions(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with odps_params, table_name, partition task_config = pickle.loads(sock.recv(cmd_len)) from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import context cupid_ctx = context() odps_params = task_config['odps_params'] bearer_token = cupid_ctx.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) or odps_params['project'] endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) table = o.get_table(task_config['table_name']) partition_desc = task_config.get('partition') if not table.schema.partitions: _write_request_result(sock, result=None) elif partition_desc: if check_partition_exist(table, partition_desc): _write_request_result(sock, result=[partition_desc]) else: parts = filter_partitions(o, list(table.partitions), partition_desc) _write_request_result( sock, result=[str(pt.partition_spec) for pt in parts]) else: _write_request_result( sock, result=[str(pt.partition_spec) for pt in table.partitions]) except: logger.exception('Failed to create download session') _write_request_result(sock, False, exc_info=sys.exc_info())
def _handle_put_kv(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with key cmd_body = pickle.loads(sock.recv(cmd_len)) from cupid.runtime import RuntimeContext if not RuntimeContext.is_context_ready(): logger.warning('Cupid context not ready') else: from cupid import context cupid_kv = context().kv_store() cupid_kv[cmd_body['key']] = cmd_body['value'] _write_request_result(sock) except: logger.exception('Failed to put kv value') _write_request_result(sock, False, exc_info=sys.exc_info())
def _handle_create_table_upload_session(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with odps_params, table_name session_config = pickle.loads(sock.recv(cmd_len)) from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.runtime import RuntimeContext if not RuntimeContext.is_context_ready(): raise SystemError( 'No Mars cluster found, please create via `o.create_mars_cluster`.' ) cupid_ctx = context() odps_params = session_config['odps_params'] bearer_token = cupid_ctx.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) or odps_params['project'] endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) cupid_session = CupidSession(o) data_src = o.get_table(session_config['table_name']) logger.debug('Start creating upload session from cupid.') upload_session = cupid_session.create_upload_session(data_src) ret_data = { 'handle': upload_session.handle, } _write_request_result(sock, result=ret_data) except: logger.exception('Failed to create upload session') _write_request_result(sock, False, exc_info=sys.exc_info())
def execute(cls, ctx, op): import pandas as pd from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.io.table import CupidTableUploadSession if op.is_terminal: bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) o = ODPS(None, None, account=account, **op.odps_params) cupid_session = CupidSession(o) project_name, table_name = op.table_name.split('.') upload_session = CupidTableUploadSession( session=cupid_session, table_name=table_name, project_name=project_name, handle=op.cupid_handle, blocks=op.blocks) upload_session.commit(overwrite=op.overwrite) ctx[op.outputs[0].key] = pd.DataFrame()
def _main(): from cupid import context cupid_context = context() mars_endpoint = wait_mars_ready(cupid_context.kv_store(), CUPID_APP_NAME) host_addr = socket.gethostbyname(socket.gethostname()) endpoint = 'http://{0}:{1}'.format(host_addr, NOTEBOOK_PORT) # dump endpoint to ~/.mars dump_endpoint(mars_endpoint) # add startup script for notebook config_startup() # start notebook start_notebook(NOTEBOOK_PORT) # modify in hyper mode if os.environ.get('VM_ENGINE_TYPE') == 'hyper': endpoint = socket.gethostname() + "-{}".format(NOTEBOOK_PORT) cupid_context.register_application(NOTEBOOK_NAME, endpoint)
def post_process_start_child(self, idx): while not os.path.exists(self._channel_file[idx]): time.sleep(1) try: with open(self._channel_file[idx], 'r') as env_file: envs = json.loads(env_file.read()) except: time.sleep(1) with open(self._channel_file[idx], 'r') as env_file: envs = json.loads(env_file.read()) from cupid import context os.environ.update(envs) proc_cupid_context = context() odps_envs = { 'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'], 'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'], } os.environ.update(odps_envs) logger.info('Started channel for process index %s.', idx)
def start_cupid_service(self): self._env_path = tempfile.mkdtemp(prefix='mars-pool-') self._channel_file = os.path.join( self._env_path, 'mars-cupid-channel-%s.json' % os.getpid()) self._cupid_sock_file = os.environ[ 'CUPID_SERVICE_SOCKET'] = os.path.join( self._env_path, 'mars-cupid-sock-%s.sock' % os.getpid()) self._cupid_service_proc = multiprocessing.Process( target=run_cupid_service, args=(self._channel_file, )) self._cupid_service_proc.start() from cupid import context self._cupid_context = context() envs = self._cupid_context.prepare_channel() envs_dict = dict((env.name, env.value) for env in envs) with open(self._channel_file, 'w') as env_file: env_file.write(json.dumps(envs_dict)) while not os.path.exists(self._cupid_sock_file): time.sleep(0.1)
def start(self): from mars.actors import new_client from cupid import context self.cupid_context = context() self.read_cupid_service_info(self.args.cupid_scheduler_key) self.create_scheduler_discoverer() super(CupidWorkerServiceMain, self).start() actor_client = new_client() proc_helpers = self._service._process_helper_actors for proc_helper_actor in proc_helpers: logger.info('Start channel for subprocess %s.', proc_helper_actor.uid) envs = self.cupid_context.prepare_channel() proc_helper_ref = actor_client.actor_ref(proc_helper_actor) new_envs = dict((env.name, env.value) for env in envs) proc_helper_ref.start_channel(new_envs) logger.info('All channel ready, upload worker status now.') self._service._status_ref.enable_status_upload(channel_ready=True, _tell=True)
def _handle_get_kv(sock): try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with key cmd_body = pickle.loads(sock.recv(cmd_len)) from cupid.runtime import RuntimeContext if not RuntimeContext.is_context_ready(): logger.warning('Cupid context not ready') value = None else: from cupid import context cupid_kv = context().kv_store() value = cupid_kv.get(cmd_body['key']) ret_data = { 'value': value, } _write_request_result(sock, result=ret_data) except: logger.exception('Failed to get kv value') _write_request_result(sock, False, exc_info=sys.exc_info())
def post_process_start_child(self, idx): try: # Patch import here. # The reason is that tensorflow relies on protobuf 3+, # meanwhile, cupid channel relies on protobuf 2.4, # however, when cupid channel started below, # tensorflow will recognize the old version of protobuf # even when we set LD_LIBRARY_PATH, # so we import tensorflow in advance to prevent from potential crash. import tensorflow except ImportError: pass # set STDOUT to unbuffer mode sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True) while not os.path.exists(self._channel_file[idx]): time.sleep(1) try: with open(self._channel_file[idx], 'r') as env_file: envs = json.loads(env_file.read()) except: time.sleep(1) with open(self._channel_file[idx], 'r') as env_file: envs = json.loads(env_file.read()) from cupid import context os.environ.update(envs) proc_cupid_context = context() odps_envs = { 'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'], 'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'], } os.environ.update(odps_envs) logger.info('Started channel for process index %s.', idx)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) from cupid import context self._cupid_context = context()
def __init__(self): super(CupidWebServiceMain, self).__init__() from cupid import context self.cupid_context = context()
def get_bearer_token(self): from cupid import context ctx = context() return ctx.get_bearer_token()
def _tile_cupid(cls, op): from odps import ODPS from odps.accounts import BearerTokenAccount from cupid import CupidSession, context from cupid.runtime import RuntimeContext if not RuntimeContext.is_context_ready(): raise SystemError( 'No Mars cluster found, please create via `o.create_mars_cluster`.' ) cupid_ctx = context() bearer_token = cupid_ctx.get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ.get('ODPS_PROJECT_NAME', None) odps_params = op.odps_params.copy() if project: odps_params['project'] = project endpoint = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint'] o = ODPS(None, None, account=account, project=odps_params['project'], endpoint=endpoint) cupid_session = CupidSession(o) data_src = o.get_table(op.table_name) logger.debug('Start creating upload session from cupid.') upload_session = cupid_session.create_upload_session(data_src) input_df = build_concatenated_rows_frame(op.inputs[0]) out_df = op.outputs[0] out_chunks = [] out_chunk_shape = (0, ) * len(input_df.shape) blocks = {} for chunk in input_df.chunks: block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace( '-', '') chunk_op = DataFrameWriteTableSplit( dtypes=op.dtypes, table_name=op.table_name, unknown_as_string=op.unknown_as_string, partition_spec=op.partition_spec, cupid_handle=to_str(upload_session.handle), block_id=block_id, write_batch_size=op.write_batch_size) out_chunk = chunk_op.new_chunk([chunk], shape=out_chunk_shape, index=chunk.index, index_value=out_df.index_value, dtypes=chunk.dtypes) out_chunks.append(out_chunk) blocks[block_id] = op.partition_spec # build commit tree combine_size = 8 chunks = out_chunks while len(chunks) >= combine_size: new_chunks = [] for i in range(0, len(chunks), combine_size): chks = chunks[i:i + combine_size] if len(chks) == 1: chk = chks[0] else: chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes, is_terminal=False) chk = chk_op.new_chunk(chks, shape=out_chunk_shape, index_value=out_df.index_value, dtypes=op.dtypes) new_chunks.append(chk) chunks = new_chunks assert len(chunks) < combine_size commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes, table_name=op.table_name, blocks=blocks, cupid_handle=to_str( upload_session.handle), overwrite=op.overwrite, odps_params=op.odps_params, is_terminal=True) commit_table_chunk = commit_table_op.new_chunk( chunks, shape=out_chunk_shape, dtypes=op.dtypes, index_value=out_df.index_value) new_op = op.copy() return new_op.new_dataframes(op.inputs, shape=out_df.shape, index_value=out_df.index_value, dtypes=out_df.dtypes, columns_value=out_df.columns_value, chunks=[commit_table_chunk], nsplits=((0, ), ) * len(out_chunk_shape))
def cupid_kv(self): if not hasattr(self, '_cupid_kv'): from cupid import context self._cupid_kv = context().kv_store() return self._cupid_kv