def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client( context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client # Group parts by size and each group will be handled by one function execution. total_num_of_parts, num_of_groups, num_of_parts_per_group = calc_groups( evt["total_size"], evt["part_size"], evt["medium_file_limit"]) upload_id = dest_client.init_multipart_upload(evt["key"]).upload_id return { "upload_id": upload_id, "total_num_of_parts": total_num_of_parts, "groups": list(range(num_of_groups)), "num_of_parts_per_group": num_of_parts_per_group }
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client( context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client # Download part of the file and upload as part part_no = evt["part_no"] part_size = evt["part_size"] byte_range = ((part_no - 1) * part_size, min(part_no * part_size, evt["total_size"]) - 1) # src_client.get_object_to_file(evt["key"], "/tmp/testpart%d" % part_no, byte_range=byte_range) object_stream = src_client.get_object(evt["key"], byte_range=byte_range) res = dest_client.upload_part(evt["key"], evt["upload_id"], part_no, object_stream) return {"part_no": part_no, "etag": res.etag}
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client(context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client crcs = evt.get("crcs") failed_crcs = [] start_time = time.time() count = evt.get("count", 10) base_size = evt.get("base_size", 1024) def producer(queue): for i in range(count): queue.put(i) def consumer(queue): while queue.ok(): item = queue.get() if item is None: break i = item key = '%s/%d' % (evt["prefix"], i) result = dest_client.head_object(key) crc = result.headers["x-oss-hash-crc64ecma"] if crcs != None: if crc != str(crcs[i]): logger.info("expected %s, actual %s", crcs[i], crc) failed_crcs.append(i) else: result = src_client.head_object(key) src_crc = result.headers["x-oss-hash-crc64ecma"] if crc != str(src_crc): logger.info("expected %s, actual %s", src_crc, crc) failed_crcs.append(i) task_q = TaskQueue(producer, [consumer] * 16) task_q.run() return {'failed_crcs': failed_crcs, 'success': len(failed_crcs) == 0}
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client(context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client copy(src_client, dest_client, evt["key"]) return {}
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client( context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client parts = copy(gen_parts, src_client, dest_client, evt["key"], evt["part_size"], evt["total_size"], evt["upload_id"], evt["group_id"], evt["num_of_parts_per_group"], evt["total_num_of_parts"]) return {"parts": parts}
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region dest_bucket = evt["dest_bucket"] dest_client = clients.get(dest_bucket) if dest_client is None: dest_client = get_oss_client( context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role")) clients[dest_bucket] = dest_client parts = [] for part in evt["parts"]: parts.append(PartInfo(part["part_no"], part["etag"])) dest_client.complete_multipart_upload(evt["key"], evt["upload_id"], parts) return {}
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) logger.info("Handling event: %s", evt) src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region src_bucket = evt["src_bucket"] src_client = clients.get(src_bucket) if src_client is None: src_client = get_oss_client(context, src_endpoint, src_bucket) clients[src_bucket] = src_client has_more = False marker = evt["marker"] group_threshold = evt.get("group_threshold", 50) total_group_count = evt.get("total_group_count", 0) medium_file_limit = evt.get("medium_file_limit") small_file_limit = evt.get("small_file_limit") small_group_total = 0 small_group = [ ] # The total size of small files is less than medium_file_limit small = [] # An array of small_group medium = [] large = [] current_group_size = 0 leave_early = False while True: result = src_client.list_objects(prefix=evt["prefix"], marker=marker, delimiter=evt["delimiter"], max_keys=500) logger.info("Found %d objects", len(result.object_list)) marker = result.next_marker has_more = result.is_truncated # A function can process amount of files up to medium_file_limit for i in range(0, len(result.object_list)): obj = result.object_list[i] logger.info("key: %s, size: %s, group size: %d", obj.key, obj.size, current_group_size) if (current_group_size * medium_file_limit + small_group_total + obj.size + medium_file_limit - 1) // medium_file_limit > group_threshold: # Leave early and override has_more and marker has_more = True leave_early = True marker = result.object_list[i].key break # Group small files as many as possible but their total size should not exceed medium_file_limit if obj.size <= small_file_limit: if obj.size + small_group_total <= medium_file_limit: small_group_total += obj.size small_group.append(obj.key) else: small.append(small_group) small_group_total = obj.size small_group = [] small_group.append(obj.key) current_group_size += 1 elif obj.size <= medium_file_limit: medium.append([obj.key, obj.size]) current_group_size += 1 else: large.append([obj.key, obj.size]) # The large file will be divided into small groups and each group size is up to medium_file_limit current_group_size += (obj.size + medium_file_limit - 1) // medium_file_limit if not has_more or leave_early: break if len(small_group) > 0: small.append(small_group) total_group_count += (current_group_size * medium_file_limit + small_group_total + medium_file_limit - 1) // medium_file_limit return { "small": small, # [["key1","key2","key3"],["key4","key5"]] "medium": medium, # [["key9",size],["key11",size]] "large": large, # [["key6",size],["key7",size]] "has_more": has_more, "marker": marker, "total_group_count": total_group_count, "execution_name": evt.get("execution_name", "") + "-" + re.sub(r"[^a-zA-Z0-9-_]", "_", marker) }