Ejemplo n.º 1
0
    def wrapper(ctx, op):
        from .cupid_service import CupidServiceClient
        from mars.utils import to_str
        if 'CUPID_SERVICE_SOCKET' not in os.environ:
            f(ctx, op)
        else:
            old_envs = os.environ.copy()
            try:
                env = os.environ

                logger.debug('Get bearer token from cupid.')
                bearer_token = CupidServiceClient().get_bearer_token()
                env['ODPS_BEARER_TOKEN'] = to_str(bearer_token)
                if 'endpoint' in op.extra_params:
                    env['ODPS_ENDPOINT'] = os.environ.get(
                        'ODPS_RUNTIME_ENDPOINT') or str(
                            op.extra_params['endpoint'])
                if ('project' in op.extra_params) and ('ODPS_PROJECT_NAME'
                                                       not in env):
                    env['ODPS_PROJECT_NAME'] = str(op.extra_params['project'])
                f(ctx, op)
                for out in op.outputs:
                    if ctx[out.key] is None:
                        ctx[out.key] = {'status': 'OK'}
            finally:
                os.environ = old_envs
Ejemplo n.º 2
0
    def wrapper(ctx, op):
        from cupid import context
        from mars.utils import to_str
        old_envs = os.environ.copy()
        try:
            if context() is None:
                logger.debug('Not in ODPS environment.')
                f(ctx, op)
            else:
                env = os.environ

                logger.debug('Get bearer token from cupid.')
                bearer_token = context().get_bearer_token()
                env['ODPS_BEARER_TOKEN'] = to_str(bearer_token)
                if 'endpoint' in op.extra_params:
                    env['ODPS_ENDPOINT'] = str(op.extra_params['endpoint'])
                if ('project' in op.extra_params) and ('ODPS_PROJECT_NAME'
                                                       not in env):
                    env['ODPS_PROJECT_NAME'] = str(op.extra_params['project'])
                f(ctx, op)
                for out in op.outputs:
                    if ctx[out.key] is None:
                        ctx[out.key] = {'status': 'OK'}
        finally:
            os.environ = old_envs
Ejemplo n.º 3
0
def _write_table_in_cupid(odps,
                          df,
                          table,
                          partition=None,
                          overwrite=True,
                          unknown_as_string=None):
    import pyarrow as pa
    from mars.utils import to_str
    from cupid import CupidSession
    from cupid.io.table.core import BlockWriter

    cupid_session = CupidSession(odps)
    logger.debug('Start creating upload session from cupid.')
    upload_session = cupid_session.create_upload_session(table)
    block_writer = BlockWriter(_table_name=table.name,
                               _project_name=table.project.name,
                               _table_schema=table.schema,
                               _partition_spec=partition,
                               _block_id='0',
                               _handle=to_str(upload_session.handle))
    logger.debug('Start writing table block, block id: 0')
    with block_writer.open_arrow_writer() as cupid_writer:
        sink = pa.BufferOutputStream()

        batch_size = 1024
        batch_idx = 0
        batch_data = df[batch_size * batch_idx:batch_size * (batch_idx + 1)]
        batch_data = convert_pandas_object_to_string(batch_data)
        schema = pa.RecordBatch.from_pandas(df[:1],
                                            preserve_index=False).schema
        arrow_writer = pa.RecordBatchStreamWriter(sink, schema)
        while len(batch_data) > 0:
            batch = pa.RecordBatch.from_pandas(batch_data,
                                               preserve_index=False)
            arrow_writer.write_batch(batch)
            batch_idx += 1
            batch_data = df[batch_size * batch_idx:batch_size *
                            (batch_idx + 1)]
        arrow_writer.close()
        cupid_writer.write(sink.getvalue())
    block_writer.commit()

    upload_session._blocks = {'0': partition}
    upload_session.commit(overwrite=overwrite)
Ejemplo n.º 4
0
    def testStringConversion(self):
        s = None
        self.assertIsNone(utils.to_binary(s))
        self.assertIsNone(utils.to_str(s))
        self.assertIsNone(utils.to_text(s))

        s = 'abcdefg'
        self.assertIsInstance(utils.to_binary(s), bytes)
        self.assertEqual(utils.to_binary(s), b'abcdefg')
        self.assertIsInstance(utils.to_str(s), str)
        self.assertEqual(utils.to_str(s), 'abcdefg')
        self.assertIsInstance(utils.to_text(s), unicode)
        self.assertEqual(utils.to_text(s), u'abcdefg')

        ustr = type('ustr', (str, ), {})
        self.assertIsInstance(utils.to_str(ustr(s)), str)
        self.assertEqual(utils.to_str(ustr(s)), 'abcdefg')

        s = b'abcdefg'
        self.assertIsInstance(utils.to_binary(s), bytes)
        self.assertEqual(utils.to_binary(s), b'abcdefg')
        self.assertIsInstance(utils.to_str(s), str)
        self.assertEqual(utils.to_str(s), 'abcdefg')
        self.assertIsInstance(utils.to_text(s), unicode)
        self.assertEqual(utils.to_text(s), u'abcdefg')

        ubytes = type('ubytes', (bytes, ), {})
        self.assertIsInstance(utils.to_binary(ubytes(s)), bytes)
        self.assertEqual(utils.to_binary(ubytes(s)), b'abcdefg')

        s = u'abcdefg'
        self.assertIsInstance(utils.to_binary(s), bytes)
        self.assertEqual(utils.to_binary(s), b'abcdefg')
        self.assertIsInstance(utils.to_str(s), str)
        self.assertEqual(utils.to_str(s), 'abcdefg')
        self.assertIsInstance(utils.to_text(s), unicode)
        self.assertEqual(utils.to_text(s), u'abcdefg')

        uunicode = type('uunicode', (unicode, ), {})
        self.assertIsInstance(utils.to_text(uunicode(s)), unicode)
        self.assertEqual(utils.to_text(uunicode(s)), u'abcdefg')

        with self.assertRaises(TypeError):
            utils.to_binary(utils)
        with self.assertRaises(TypeError):
            utils.to_str(utils)
        with self.assertRaises(TypeError):
            utils.to_text(utils)
Ejemplo n.º 5
0
def test_string_conversion():
    s = None
    assert utils.to_binary(s) is None
    assert utils.to_str(s) is None
    assert utils.to_text(s) is None

    s = 'abcdefg'
    assert isinstance(utils.to_binary(s), bytes)
    assert utils.to_binary(s) == b'abcdefg'
    assert isinstance(utils.to_str(s), str)
    assert utils.to_str(s) == 'abcdefg'
    assert isinstance(utils.to_text(s), str)
    assert utils.to_text(s) == u'abcdefg'

    ustr = type('ustr', (str,), {})
    assert isinstance(utils.to_str(ustr(s)), str)
    assert utils.to_str(ustr(s)) == 'abcdefg'

    s = b'abcdefg'
    assert isinstance(utils.to_binary(s), bytes)
    assert utils.to_binary(s) == b'abcdefg'
    assert isinstance(utils.to_str(s), str)
    assert utils.to_str(s) == 'abcdefg'
    assert isinstance(utils.to_text(s), str)
    assert utils.to_text(s) == u'abcdefg'

    ubytes = type('ubytes', (bytes,), {})
    assert isinstance(utils.to_binary(ubytes(s)), bytes)
    assert utils.to_binary(ubytes(s)) == b'abcdefg'

    s = u'abcdefg'
    assert isinstance(utils.to_binary(s), bytes)
    assert utils.to_binary(s) == b'abcdefg'
    assert isinstance(utils.to_str(s), str)
    assert utils.to_str(s) == 'abcdefg'
    assert isinstance(utils.to_text(s), str)
    assert utils.to_text(s) == u'abcdefg'

    uunicode = type('uunicode', (str,), {})
    assert isinstance(utils.to_text(uunicode(s)), str)
    assert utils.to_text(uunicode(s)) == u'abcdefg'

    with pytest.raises(TypeError):
        utils.to_binary(utils)
    with pytest.raises(TypeError):
        utils.to_str(utils)
    with pytest.raises(TypeError):
        utils.to_text(utils)