def wrapper(ctx, op): from .cupid_service import CupidServiceClient from mars.utils import to_str if 'CUPID_SERVICE_SOCKET' not in os.environ: f(ctx, op) else: old_envs = os.environ.copy() try: env = os.environ logger.debug('Get bearer token from cupid.') bearer_token = CupidServiceClient().get_bearer_token() env['ODPS_BEARER_TOKEN'] = to_str(bearer_token) if 'endpoint' in op.extra_params: env['ODPS_ENDPOINT'] = os.environ.get( 'ODPS_RUNTIME_ENDPOINT') or str( op.extra_params['endpoint']) if ('project' in op.extra_params) and ('ODPS_PROJECT_NAME' not in env): env['ODPS_PROJECT_NAME'] = str(op.extra_params['project']) f(ctx, op) for out in op.outputs: if ctx[out.key] is None: ctx[out.key] = {'status': 'OK'} finally: os.environ = old_envs
def wrapper(ctx, op): from cupid import context from mars.utils import to_str old_envs = os.environ.copy() try: if context() is None: logger.debug('Not in ODPS environment.') f(ctx, op) else: env = os.environ logger.debug('Get bearer token from cupid.') bearer_token = context().get_bearer_token() env['ODPS_BEARER_TOKEN'] = to_str(bearer_token) if 'endpoint' in op.extra_params: env['ODPS_ENDPOINT'] = str(op.extra_params['endpoint']) if ('project' in op.extra_params) and ('ODPS_PROJECT_NAME' not in env): env['ODPS_PROJECT_NAME'] = str(op.extra_params['project']) f(ctx, op) for out in op.outputs: if ctx[out.key] is None: ctx[out.key] = {'status': 'OK'} finally: os.environ = old_envs
def _write_table_in_cupid(odps, df, table, partition=None, overwrite=True, unknown_as_string=None): import pyarrow as pa from mars.utils import to_str from cupid import CupidSession from cupid.io.table.core import BlockWriter cupid_session = CupidSession(odps) logger.debug('Start creating upload session from cupid.') upload_session = cupid_session.create_upload_session(table) block_writer = BlockWriter(_table_name=table.name, _project_name=table.project.name, _table_schema=table.schema, _partition_spec=partition, _block_id='0', _handle=to_str(upload_session.handle)) logger.debug('Start writing table block, block id: 0') with block_writer.open_arrow_writer() as cupid_writer: sink = pa.BufferOutputStream() batch_size = 1024 batch_idx = 0 batch_data = df[batch_size * batch_idx:batch_size * (batch_idx + 1)] batch_data = convert_pandas_object_to_string(batch_data) schema = pa.RecordBatch.from_pandas(df[:1], preserve_index=False).schema arrow_writer = pa.RecordBatchStreamWriter(sink, schema) while len(batch_data) > 0: batch = pa.RecordBatch.from_pandas(batch_data, preserve_index=False) arrow_writer.write_batch(batch) batch_idx += 1 batch_data = df[batch_size * batch_idx:batch_size * (batch_idx + 1)] arrow_writer.close() cupid_writer.write(sink.getvalue()) block_writer.commit() upload_session._blocks = {'0': partition} upload_session.commit(overwrite=overwrite)
def testStringConversion(self): s = None self.assertIsNone(utils.to_binary(s)) self.assertIsNone(utils.to_str(s)) self.assertIsNone(utils.to_text(s)) s = 'abcdefg' self.assertIsInstance(utils.to_binary(s), bytes) self.assertEqual(utils.to_binary(s), b'abcdefg') self.assertIsInstance(utils.to_str(s), str) self.assertEqual(utils.to_str(s), 'abcdefg') self.assertIsInstance(utils.to_text(s), unicode) self.assertEqual(utils.to_text(s), u'abcdefg') ustr = type('ustr', (str, ), {}) self.assertIsInstance(utils.to_str(ustr(s)), str) self.assertEqual(utils.to_str(ustr(s)), 'abcdefg') s = b'abcdefg' self.assertIsInstance(utils.to_binary(s), bytes) self.assertEqual(utils.to_binary(s), b'abcdefg') self.assertIsInstance(utils.to_str(s), str) self.assertEqual(utils.to_str(s), 'abcdefg') self.assertIsInstance(utils.to_text(s), unicode) self.assertEqual(utils.to_text(s), u'abcdefg') ubytes = type('ubytes', (bytes, ), {}) self.assertIsInstance(utils.to_binary(ubytes(s)), bytes) self.assertEqual(utils.to_binary(ubytes(s)), b'abcdefg') s = u'abcdefg' self.assertIsInstance(utils.to_binary(s), bytes) self.assertEqual(utils.to_binary(s), b'abcdefg') self.assertIsInstance(utils.to_str(s), str) self.assertEqual(utils.to_str(s), 'abcdefg') self.assertIsInstance(utils.to_text(s), unicode) self.assertEqual(utils.to_text(s), u'abcdefg') uunicode = type('uunicode', (unicode, ), {}) self.assertIsInstance(utils.to_text(uunicode(s)), unicode) self.assertEqual(utils.to_text(uunicode(s)), u'abcdefg') with self.assertRaises(TypeError): utils.to_binary(utils) with self.assertRaises(TypeError): utils.to_str(utils) with self.assertRaises(TypeError): utils.to_text(utils)
def test_string_conversion(): s = None assert utils.to_binary(s) is None assert utils.to_str(s) is None assert utils.to_text(s) is None s = 'abcdefg' assert isinstance(utils.to_binary(s), bytes) assert utils.to_binary(s) == b'abcdefg' assert isinstance(utils.to_str(s), str) assert utils.to_str(s) == 'abcdefg' assert isinstance(utils.to_text(s), str) assert utils.to_text(s) == u'abcdefg' ustr = type('ustr', (str,), {}) assert isinstance(utils.to_str(ustr(s)), str) assert utils.to_str(ustr(s)) == 'abcdefg' s = b'abcdefg' assert isinstance(utils.to_binary(s), bytes) assert utils.to_binary(s) == b'abcdefg' assert isinstance(utils.to_str(s), str) assert utils.to_str(s) == 'abcdefg' assert isinstance(utils.to_text(s), str) assert utils.to_text(s) == u'abcdefg' ubytes = type('ubytes', (bytes,), {}) assert isinstance(utils.to_binary(ubytes(s)), bytes) assert utils.to_binary(ubytes(s)) == b'abcdefg' s = u'abcdefg' assert isinstance(utils.to_binary(s), bytes) assert utils.to_binary(s) == b'abcdefg' assert isinstance(utils.to_str(s), str) assert utils.to_str(s) == 'abcdefg' assert isinstance(utils.to_text(s), str) assert utils.to_text(s) == u'abcdefg' uunicode = type('uunicode', (str,), {}) assert isinstance(utils.to_text(uunicode(s)), str) assert utils.to_text(uunicode(s)) == u'abcdefg' with pytest.raises(TypeError): utils.to_binary(utils) with pytest.raises(TypeError): utils.to_str(utils) with pytest.raises(TypeError): utils.to_text(utils)