def copy(src_client, dest_client, key, part_size, total_size): logger = logging.getLogger() logger.info("Starting to copy %s", key) start_time = time.time() upload_id = dest_client.init_multipart_upload(key).upload_id def producer(queue): # Decide the number of parts num_of_parts = (total_size + part_size - 1) // part_size for part_no in range(1, num_of_parts+1): part_range = ((part_no-1)*part_size, min(part_no*part_size, total_size)-1) queue.put((part_no, part_range)) parts = [] def consumer(queue): while queue.ok(): item = queue.get() if item is None: break part_no, part_range = item object_stream = src_client.get_object(key, byte_range=part_range) res = dest_client.upload_part(key, upload_id, part_no, object_stream) parts.append(PartInfo(part_no, res.etag)) task_q = TaskQueue(producer, [consumer] * 16) task_q.run() dest_client.complete_multipart_upload(key, upload_id, parts) end_time = time.time() logger.info('Copied %s in %s secs', key, end_time-start_time)
def copy(src_client, dest_client, keys): logger = logging.getLogger() logger.info("Starting to copy %d objects", len(keys)) start_time = time.time() def producer(queue): for key in keys: queue.put(key) parts = [] def consumer(queue): while queue.ok(): item = queue.get() if item is None: break key = item object_stream = src_client.get_object(key) res = dest_client.put_object(key, object_stream) task_q = TaskQueue(producer, [consumer] * 16) task_q.run() end_time = time.time() logger.info('Copied %d objects in %s secs', len(keys), end_time - start_time)
def copy(gen_parts, src_client, dest_client, key, part_size, total_size, upload_id, group_id, num_of_parts_per_group, total_num_of_parts): logger = logging.getLogger() logger.info("Starting to copy %s, group_id %d", key, group_id) start_time = time.time() def producer(queue): gen_parts(queue, part_size, total_size, group_id, num_of_parts_per_group, total_num_of_parts) parts = [] def consumer(queue): while queue.ok(): item = queue.get() if item is None: break part_no, part_range = item logger.info("%d -> %s", part_no, part_range) object_stream = src_client.get_object(key, byte_range=part_range) res = dest_client.upload_part(key, upload_id, part_no, object_stream) parts.append({"part_no": part_no, "etag": res.etag}) task_q = TaskQueue(producer, [consumer] * 16) task_q.run() end_time = time.time() logger.info('Copied %s in %s secs', key, end_time - start_time) return parts
def zip_files(oss_client, source_dir, source_files, dest_file): LOG.info('create zip, source_dir: %s, source_files: %s, dest_file: %s', source_dir, source_files, dest_file) start_time = time.time() upload_id = oss_client.init_multipart_upload(dest_file).upload_id def zip_add_file(zip_file, key): LOG.info('add zip file: %s', key) if key[-1] == '/': return obj = oss_client.get_object(key) zip_file.write_file(key[len(source_dir):], obj, compress_type=zipfile.ZIP_STORED) def producer(queue): mem_buf = MemBuffer(queue) zip_file = StreamZipFile(mem_buf, 'w') if isinstance(source_files, list): for obj in source_files: zip_add_file(zip_file, obj) elif isinstance(source_dir, basestring): for obj in oss2.ObjectIterator(oss_client, prefix=source_dir): zip_add_file(zip_file, obj.key) else: raise Exception( 'either `source_files` or `source_dir` must be speicified') zip_file.close() mem_buf.flush_buffer() parts = [] def consumer(queue): while queue.ok(): item = queue.get() if item is None: break part_no, part_data = item res = oss_client.upload_part(dest_file, upload_id, part_no, part_data) parts.append(PartInfo(part_no, res.etag)) task_q = TaskQueue(producer, [consumer] * 16) task_q.run() oss_client.complete_multipart_upload(dest_file, upload_id, parts) end_time = time.time() LOG.info('create zip, cost: %s secs', end_time - start_time) return dest_file
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client(context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client crcs = evt.get("crcs") failed_crcs = [] start_time = time.time() count = evt.get("count", 10) base_size = evt.get("base_size", 1024) def producer(queue): for i in range(count): queue.put(i) def consumer(queue): while queue.ok(): item = queue.get() if item is None: break i = item key = '%s/%d' % (evt["prefix"], i) result = dest_client.head_object(key) crc = result.headers["x-oss-hash-crc64ecma"] if crcs != None: if crc != str(crcs[i]): logger.info("expected %s, actual %s", crcs[i], crc) failed_crcs.append(i) else: result = src_client.head_object(key) src_crc = result.headers["x-oss-hash-crc64ecma"] if crc != str(src_crc): logger.info("expected %s, actual %s", src_crc, crc) failed_crcs.append(i) task_q = TaskQueue(producer, [consumer] * 16) task_q.run() return {'failed_crcs': failed_crcs, 'success': len(failed_crcs) == 0}
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region dest_client = get_oss_client(context, src_endpoint, evt["src_bucket"]) start_time = time.time() count = evt.get("count", 10) base_size = evt.get("base_size", 1024) def producer(queue): for i in range(count): queue.put(i) crcs = [None]*count def consumer(queue): while queue.ok(): item = queue.get() if item is None: break i = item size = random.gauss(64, 64) size = size if size > 0 else 1 size = size if size < 128 else 128 chars = ''.join([random.choice(string.printable) for i in range(int(size*base_size))]) key = '%s/%d' % (evt["prefix"], i) result = dest_client.put_object(key, chars) crcs[i]= result.crc task_q = TaskQueue(producer, [consumer] * 16) task_q.run() end_time = time.time() logger.info('Saved %d objects in %s secs', len(crcs), end_time-start_time) return {"crcs": crcs}