コード例 #1
0
    def wrapper(ctx, op):
        from cupid import context
        from mars.utils import to_str
        old_envs = os.environ.copy()
        try:
            if context() is None:
                logger.debug('Not in ODPS environment.')
                f(ctx, op)
            else:
                env = os.environ

                logger.debug('Get bearer token from cupid.')
                bearer_token = context().get_bearer_token()
                env['ODPS_BEARER_TOKEN'] = to_str(bearer_token)
                if 'endpoint' in op.extra_params:
                    env['ODPS_ENDPOINT'] = str(op.extra_params['endpoint'])
                if ('project' in op.extra_params) and ('ODPS_PROJECT_NAME'
                                                       not in env):
                    env['ODPS_PROJECT_NAME'] = str(op.extra_params['project'])
                f(ctx, op)
                for out in op.outputs:
                    if ctx[out.key] is None:
                        ctx[out.key] = {'status': 'OK'}
        finally:
            os.environ = old_envs
コード例 #2
0
def _handle_terminate_instance(sock):
    from cupid.runtime import context, RuntimeContext
    from odps import ODPS
    from odps.accounts import BearerTokenAccount

    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        instance_id = cmd_body['instance_id']

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
        else:
            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ['ODPS_PROJECT_NAME']
            endpoint = os.environ['ODPS_RUNTIME_ENDPOINT']
            o = ODPS(None,
                     None,
                     account=account,
                     project=project,
                     endpoint=endpoint)

            o.stop_instance(instance_id)
    except:
        logger.exception('Failed to put kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #3
0
    def execute(cls, ctx, op):
        import pandas as pd
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.io.table import CupidTableUploadSession

        if op.is_terminal:
            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ.get('ODPS_PROJECT_NAME', None)
            odps_params = op.odps_params.copy()
            if project:
                odps_params['project'] = project
            endpoint = os.environ.get(
                'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
            o = ODPS(None,
                     None,
                     account=account,
                     project=odps_params['project'],
                     endpoint=endpoint)
            cupid_session = CupidSession(o)

            project_name, table_name = op.table_name.split('.')
            upload_session = CupidTableUploadSession(session=cupid_session,
                                                     table_name=table_name,
                                                     project_name=project_name,
                                                     handle=op.cupid_handle,
                                                     blocks=op.blocks)
            upload_session.commit(overwrite=op.overwrite)

        ctx[op.outputs[0].key] = pd.DataFrame()
コード例 #4
0
def _main():
    from cupid import context
    from mars.utils import get_next_port

    cupid_context = context()
    mars_endpoint = wait_mars_ready(cupid_context.kv_store(), CUPID_APP_NAME)
    host_addr = socket.gethostbyname(socket.gethostname())

    os.environ.pop('KUBE_API_ADDRESS')

    if os.environ.get('VM_ENGINE_TYPE') == 'hyper':
        notebook_port = DEFAULT_NOTEBOOK_PORT
    else:
        notebook_port = str(get_next_port())

    endpoint = 'http://{0}:{1}'.format(host_addr, notebook_port)

    # dump endpoint to ~/.mars
    dump_endpoint(mars_endpoint)

    # add startup script for notebook
    config_startup()

    # start notebook
    start_notebook(notebook_port)

    # modify in hyper mode
    if os.environ.get('VM_ENGINE_TYPE') == 'hyper':
        endpoint = socket.gethostname() + "-{}".format(notebook_port)
    cupid_context.register_application(NOTEBOOK_NAME, endpoint)

    asyncio.run(create_bearer_token_actor())
コード例 #5
0
def _main():
    argv = sys.argv[1]
    args_dict = json.loads(base64.b64decode(argv).decode())
    print('launch graphscope:', args_dict)

    from cupid import context

    cupid_context = context()
    host_addr = socket.gethostbyname(socket.gethostname())

    os.environ.pop('KUBE_API_ADDRESS')

    coordinator_port = args_dict.get('port', None) or DEFAULT_GS_COORDINATOR_PORT
    coordinator_gateway_port = args_dict.get('gateway_port', None) or DEFAULT_GS_COORDINATOR_GATEWAY_PORT

    endpoint = 'http://{0}:{1}'.format(host_addr, coordinator_gateway_port)
    kvstore = cupid_context.kv_store()
    kvstore[GS_COORDINATOR_NAME] = json.dumps(dict(endpoint=endpoint))

    # start coordinator
    vineyard_socket = os.environ.get('VINEYARD_IPC_SOCKET', '/tmp/vineyard.sock')
    start_coordinator(args_dict, coordinator_port, vineyard_socket)
    start_coordinator_gateway(args_dict, coordinator_port, coordinator_gateway_port)

    # modify in hyper mode
    if os.environ.get('VM_ENGINE_TYPE') == 'hyper':
        endpoint = socket.gethostname() + "-{}".format(coordinator_port)
    cupid_context.register_application(GS_COORDINATOR_NAME, endpoint)
コード例 #6
0
    def write_cupid_service_info(self, cupid_key):
        from cupid import context
        self.cupid_context = context()

        kvstore = self.cupid_context.kv_store()
        kvstore[cupid_key] = json.dumps(dict(endpoint=self.endpoint))
        logger.info('Service endpoint %s written in key %s', self.endpoint,
                    cupid_key)
コード例 #7
0
    def get_bearer_token():
        from cupid import context

        cupid_context = context()
        if cupid_context is None:
            return

        return cupid_context.get_bearer_token()
コード例 #8
0
    def tile(cls, op):
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context

        bearer_token = context().get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        o = ODPS(None, None, account=account, **op.odps_params)
        cupid_session = CupidSession(o)

        data_src = o.get_table(op.table_name)

        logger.debug('Start creating upload session from cupid.')
        upload_session = cupid_session.create_upload_session(data_src)

        input_df = op.inputs[0]

        out_chunks = []
        out_chunk_shape = (0,) * len(input_df.shape)
        blocks = {}
        for chunk in input_df.chunks:
            block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace('-', '')
            chunk_op = DataFrameWriteTableSplit(dtypes=op.dtypes, table_name=op.table_name,
                                                partition_spec=op.partition_spec,
                                                cupid_handle=to_str(upload_session.handle),
                                                block_id=block_id, write_batch_size=op.write_batch_size)
            out_chunk = chunk_op.new_chunk([chunk], shape=out_chunk_shape, index=chunk.index, dtypes=chunk.dtypes)
            out_chunks.append(out_chunk)
            blocks[block_id] = op.partition_spec

        # build commit tree
        combine_size = 8
        chunks = out_chunks
        while len(chunks) > combine_size:
            new_chunks = []
            for i in range(0, len(chunks), combine_size):
                chks = chunks[i: i + combine_size]
                if len(chks) == 1:
                    chk = chks[0]
                else:
                    chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes, is_terminal=False)
                    chk = chk_op.new_chunk(chks, shape=out_chunk_shape, dtypes=op.dtypes)
                new_chunks.append(chk)
            chunks = new_chunks

        assert len(chunks) < combine_size

        commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes, table_name=op.table_name, blocks=blocks,
                                                    cupid_handle=to_str(upload_session.handle),
                                                    overwrite=op.overwrite, odps_params=op.odps_params,
                                                    is_terminal=True)
        commit_table_chunk = commit_table_op.new_chunk(chunks, shape=out_chunk_shape, dtypes=op.dtypes)

        out_df = op.outputs[0]
        new_op = op.copy()
        return new_op.new_dataframes(op.inputs, shape=out_df.shape,
                                     dtypes=out_df.dtypes, chunks=[commit_table_chunk],
                                     nsplits=((0,),) * len(out_chunk_shape))
コード例 #9
0
    def start_channel(self, envs):
        from cupid import context

        os.environ.update(envs)
        self._cupid_context = context()
        odps_envs = {
            'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'],
            'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'],
        }
        os.environ.update(odps_envs)
コード例 #10
0
def _prepare_channel(channel_file):
    while not os.path.exists(channel_file):
        time.sleep(1)
    try:
        with open(channel_file, 'r') as env_file:
            envs = json.loads(env_file.read())
    except:
        time.sleep(1)
        with open(channel_file, 'r') as env_file:
            envs = json.loads(env_file.read())

    from cupid import context

    os.environ.update(envs)
    context()
    odps_envs = {
        'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'],
        'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'],
    }
    os.environ.update(odps_envs)
    logger.info('Started channel for Cupid Server.')
コード例 #11
0
def _handle_get_bearer_token(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, cupid_handle, blocks, overwrite
        commit_config = pickle.loads(sock.recv(cmd_len))

        from cupid import context

        bearer_token = context().get_bearer_token()
        _write_request_result(sock, result={'token': bearer_token})
    except:
        logger.exception('Failed to get bearer token')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #12
0
    def _check_bearer_token(self):
        from cupid import context

        cupid_context = context()
        if cupid_context is None:
            return

        t = datetime.now()
        if self._last_modified_time is None:
            token = cupid_context.get_bearer_token()
            if token != self._token:
                self._token = token
                self._last_modified_time = datetime.now()
        elif (t - self._last_modified_time) > self._expired_time:
            self._token = cupid_context.get_bearer_token()
            self._last_modified_time = datetime.now()
コード例 #13
0
    def start(self):
        from mars.actors import new_client
        from cupid import context

        self.cupid_context = context()
        self.read_cupid_service_info(self.args.cupid_scheduler_key)
        self.create_scheduler_discoverer()

        super(CupidWorkerServiceMain, self).start()

        actor_client = new_client()
        proc_helpers = self._service._process_helper_actors
        for proc_helper_actor in proc_helpers:
            envs = self.cupid_context.prepare_channel()
            proc_helper_ref = actor_client.actor_ref(proc_helper_actor)
            new_envs = dict((env.name, env.value) for env in envs)
            proc_helper_ref.start_channel(new_envs)
コード例 #14
0
    def run(self):
        if self.processes:
            return

        super().run()

        from cupid import context
        self._cupid_context = context()

        kvstore = self._cupid_context.kv_store()
        advertise_endpoint = self.advertise_address.split(':')[0] \
            + ':' + self.address.split(':')[-1]
        kvstore[os.environ['MARS_K8S_POD_NAME']] = json.dumps(dict(endpoint=advertise_endpoint))
        logger.debug('Endpoint %s written to %s', advertise_endpoint, os.environ['MARS_K8S_POD_NAME'])

        for idx in range(len(self.processes)):
            self._prepare_process_channel(idx)
コード例 #15
0
def _handle_commit_table_upload_session(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, cupid_handle, blocks, overwrite
        commit_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext
        from cupid.io.table import CupidTableUploadSession

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        odps_params = commit_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        project_name, table_name = commit_config['table_name'].split('.')
        upload_session = CupidTableUploadSession(
            session=cupid_session,
            table_name=table_name,
            project_name=project_name,
            handle=commit_config['cupid_handle'],
            blocks=commit_config['blocks'])
        upload_session.commit(overwrite=commit_config['overwrite'])

        _write_request_result(sock)
    except:
        logger.exception('Failed to commit upload session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #16
0
def _handle_enum_table_partitions(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, partition
        task_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import context

        cupid_ctx = context()

        odps_params = task_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)

        table = o.get_table(task_config['table_name'])
        partition_desc = task_config.get('partition')
        if not table.schema.partitions:
            _write_request_result(sock, result=None)
        elif partition_desc:
            if check_partition_exist(table, partition_desc):
                _write_request_result(sock, result=[partition_desc])
            else:
                parts = filter_partitions(o, list(table.partitions),
                                          partition_desc)
                _write_request_result(
                    sock, result=[str(pt.partition_spec) for pt in parts])
        else:
            _write_request_result(
                sock,
                result=[str(pt.partition_spec) for pt in table.partitions])
    except:
        logger.exception('Failed to create download session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #17
0
def _handle_put_kv(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
        else:
            from cupid import context
            cupid_kv = context().kv_store()
            cupid_kv[cmd_body['key']] = cmd_body['value']

        _write_request_result(sock)
    except:
        logger.exception('Failed to put kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #18
0
def _handle_create_table_upload_session(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name
        session_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        odps_params = session_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        data_src = o.get_table(session_config['table_name'])

        logger.debug('Start creating upload session from cupid.')
        upload_session = cupid_session.create_upload_session(data_src)

        ret_data = {
            'handle': upload_session.handle,
        }
        _write_request_result(sock, result=ret_data)
    except:
        logger.exception('Failed to create upload session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #19
0
    def execute(cls, ctx, op):
        import pandas as pd
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.io.table import CupidTableUploadSession

        if op.is_terminal:
            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            o = ODPS(None, None, account=account, **op.odps_params)
            cupid_session = CupidSession(o)

            project_name, table_name = op.table_name.split('.')
            upload_session = CupidTableUploadSession(
                session=cupid_session, table_name=table_name, project_name=project_name,
                handle=op.cupid_handle, blocks=op.blocks)
            upload_session.commit(overwrite=op.overwrite)

        ctx[op.outputs[0].key] = pd.DataFrame()
コード例 #20
0
def _main():
    from cupid import context

    cupid_context = context()
    mars_endpoint = wait_mars_ready(cupid_context.kv_store(), CUPID_APP_NAME)
    host_addr = socket.gethostbyname(socket.gethostname())
    endpoint = 'http://{0}:{1}'.format(host_addr, NOTEBOOK_PORT)

    # dump endpoint to ~/.mars
    dump_endpoint(mars_endpoint)

    # add startup script for notebook
    config_startup()

    # start notebook
    start_notebook(NOTEBOOK_PORT)

    # modify in hyper mode
    if os.environ.get('VM_ENGINE_TYPE') == 'hyper':
        endpoint = socket.gethostname() + "-{}".format(NOTEBOOK_PORT)
    cupid_context.register_application(NOTEBOOK_NAME, endpoint)
コード例 #21
0
    def post_process_start_child(self, idx):
        while not os.path.exists(self._channel_file[idx]):
            time.sleep(1)
        try:
            with open(self._channel_file[idx], 'r') as env_file:
                envs = json.loads(env_file.read())
        except:
            time.sleep(1)
            with open(self._channel_file[idx], 'r') as env_file:
                envs = json.loads(env_file.read())

        from cupid import context

        os.environ.update(envs)
        proc_cupid_context = context()
        odps_envs = {
            'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'],
            'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'],
        }
        os.environ.update(odps_envs)
        logger.info('Started channel for process index %s.', idx)
コード例 #22
0
    def start_cupid_service(self):
        self._env_path = tempfile.mkdtemp(prefix='mars-pool-')
        self._channel_file = os.path.join(
            self._env_path, 'mars-cupid-channel-%s.json' % os.getpid())
        self._cupid_sock_file = os.environ[
            'CUPID_SERVICE_SOCKET'] = os.path.join(
                self._env_path, 'mars-cupid-sock-%s.sock' % os.getpid())

        self._cupid_service_proc = multiprocessing.Process(
            target=run_cupid_service, args=(self._channel_file, ))
        self._cupid_service_proc.start()

        from cupid import context
        self._cupid_context = context()

        envs = self._cupid_context.prepare_channel()
        envs_dict = dict((env.name, env.value) for env in envs)
        with open(self._channel_file, 'w') as env_file:
            env_file.write(json.dumps(envs_dict))

        while not os.path.exists(self._cupid_sock_file):
            time.sleep(0.1)
コード例 #23
0
    def start(self):
        from mars.actors import new_client
        from cupid import context

        self.cupid_context = context()
        self.read_cupid_service_info(self.args.cupid_scheduler_key)
        self.create_scheduler_discoverer()

        super(CupidWorkerServiceMain, self).start()

        actor_client = new_client()
        proc_helpers = self._service._process_helper_actors
        for proc_helper_actor in proc_helpers:
            logger.info('Start channel for subprocess %s.',
                        proc_helper_actor.uid)
            envs = self.cupid_context.prepare_channel()
            proc_helper_ref = actor_client.actor_ref(proc_helper_actor)
            new_envs = dict((env.name, env.value) for env in envs)
            proc_helper_ref.start_channel(new_envs)
        logger.info('All channel ready, upload worker status now.')
        self._service._status_ref.enable_status_upload(channel_ready=True,
                                                       _tell=True)
コード例 #24
0
def _handle_get_kv(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
            value = None
        else:
            from cupid import context
            cupid_kv = context().kv_store()
            value = cupid_kv.get(cmd_body['key'])

        ret_data = {
            'value': value,
        }
        _write_request_result(sock, result=ret_data)
    except:
        logger.exception('Failed to get kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
コード例 #25
0
    def post_process_start_child(self, idx):
        try:
            # Patch import here.
            # The reason is that tensorflow relies on protobuf 3+,
            # meanwhile, cupid channel relies on protobuf 2.4,
            # however, when cupid channel started below,
            # tensorflow will recognize the old version of protobuf
            # even when we set LD_LIBRARY_PATH,
            # so we import tensorflow in advance to prevent from potential crash.
            import tensorflow
        except ImportError:
            pass

        # set STDOUT to unbuffer mode
        sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)

        while not os.path.exists(self._channel_file[idx]):
            time.sleep(1)
        try:
            with open(self._channel_file[idx], 'r') as env_file:
                envs = json.loads(env_file.read())
        except:
            time.sleep(1)
            with open(self._channel_file[idx], 'r') as env_file:
                envs = json.loads(env_file.read())

        from cupid import context

        os.environ.update(envs)
        proc_cupid_context = context()
        odps_envs = {
            'ODPS_BEARER_TOKEN': os.environ['BEARER_TOKEN_INITIAL_VALUE'],
            'ODPS_ENDPOINT': os.environ['ODPS_RUNTIME_ENDPOINT'],
        }
        os.environ.update(odps_envs)
        logger.info('Started channel for process index %s.', idx)
コード例 #26
0
ファイル: web.py プロジェクト: sinjor/aliyun-odps-python-sdk
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        from cupid import context
        self._cupid_context = context()
コード例 #27
0
    def __init__(self):
        super(CupidWebServiceMain, self).__init__()

        from cupid import context
        self.cupid_context = context()
コード例 #28
0
        def get_bearer_token(self):
            from cupid import context

            ctx = context()
            return ctx.get_bearer_token()
コード例 #29
0
    def _tile_cupid(cls, op):
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME', None)
        odps_params = op.odps_params.copy()
        if project:
            odps_params['project'] = project
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=odps_params['project'],
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        data_src = o.get_table(op.table_name)

        logger.debug('Start creating upload session from cupid.')
        upload_session = cupid_session.create_upload_session(data_src)

        input_df = build_concatenated_rows_frame(op.inputs[0])
        out_df = op.outputs[0]

        out_chunks = []
        out_chunk_shape = (0, ) * len(input_df.shape)
        blocks = {}
        for chunk in input_df.chunks:
            block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace(
                '-', '')
            chunk_op = DataFrameWriteTableSplit(
                dtypes=op.dtypes,
                table_name=op.table_name,
                unknown_as_string=op.unknown_as_string,
                partition_spec=op.partition_spec,
                cupid_handle=to_str(upload_session.handle),
                block_id=block_id,
                write_batch_size=op.write_batch_size)
            out_chunk = chunk_op.new_chunk([chunk],
                                           shape=out_chunk_shape,
                                           index=chunk.index,
                                           index_value=out_df.index_value,
                                           dtypes=chunk.dtypes)
            out_chunks.append(out_chunk)
            blocks[block_id] = op.partition_spec

        # build commit tree
        combine_size = 8
        chunks = out_chunks
        while len(chunks) >= combine_size:
            new_chunks = []
            for i in range(0, len(chunks), combine_size):
                chks = chunks[i:i + combine_size]
                if len(chks) == 1:
                    chk = chks[0]
                else:
                    chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes,
                                                       is_terminal=False)
                    chk = chk_op.new_chunk(chks,
                                           shape=out_chunk_shape,
                                           index_value=out_df.index_value,
                                           dtypes=op.dtypes)
                new_chunks.append(chk)
            chunks = new_chunks

        assert len(chunks) < combine_size

        commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes,
                                                    table_name=op.table_name,
                                                    blocks=blocks,
                                                    cupid_handle=to_str(
                                                        upload_session.handle),
                                                    overwrite=op.overwrite,
                                                    odps_params=op.odps_params,
                                                    is_terminal=True)
        commit_table_chunk = commit_table_op.new_chunk(
            chunks,
            shape=out_chunk_shape,
            dtypes=op.dtypes,
            index_value=out_df.index_value)

        new_op = op.copy()
        return new_op.new_dataframes(op.inputs,
                                     shape=out_df.shape,
                                     index_value=out_df.index_value,
                                     dtypes=out_df.dtypes,
                                     columns_value=out_df.columns_value,
                                     chunks=[commit_table_chunk],
                                     nsplits=((0, ), ) * len(out_chunk_shape))
コード例 #30
0
 def cupid_kv(self):
     if not hasattr(self, '_cupid_kv'):
         from cupid import context
         self._cupid_kv = context().kv_store()
     return self._cupid_kv