Example #1
0
def handler(event, context):
    logger = logging.getLogger()
    evt = json.loads(event)
    logger.info("Handling event: %s", evt)
    src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
    src_bucket = evt["src_bucket"]
    src_client = clients.get(src_bucket)
    if src_client is None:
        src_client = get_oss_client(context, src_endpoint, src_bucket)
        clients[src_bucket] = src_client
    dest_bucket = evt["dest_bucket"]
    dest_client = clients.get(dest_bucket)
    if dest_client is None:
        dest_client = get_oss_client(
            context,
            evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT')
            or src_endpoint, dest_bucket, evt.get("dest_access_role"))
        clients[dest_bucket] = dest_client

    # Group parts by size and each group will be handled by one function execution.
    total_num_of_parts, num_of_groups, num_of_parts_per_group = calc_groups(
        evt["total_size"], evt["part_size"], evt["medium_file_limit"])
    upload_id = dest_client.init_multipart_upload(evt["key"]).upload_id

    return {
        "upload_id": upload_id,
        "total_num_of_parts": total_num_of_parts,
        "groups": list(range(num_of_groups)),
        "num_of_parts_per_group": num_of_parts_per_group
    }
Example #2
0
def handler(event, context):
    logger = logging.getLogger()
    evt = json.loads(event)
    logger.info("Handling event: %s", evt)
    src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
    src_bucket = evt["src_bucket"]
    src_client = clients.get(src_bucket)
    if src_client is None:
        src_client = get_oss_client(context, src_endpoint, src_bucket)
        clients[src_bucket] = src_client
    dest_bucket = evt["dest_bucket"]
    dest_client = clients.get(dest_bucket)
    if dest_client is None:
        dest_client = get_oss_client(
            context,
            evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT')
            or src_endpoint, dest_bucket, evt.get("dest_access_role"))
        clients[dest_bucket] = dest_client

    # Download part of the file and upload as part
    part_no = evt["part_no"]
    part_size = evt["part_size"]
    byte_range = ((part_no - 1) * part_size,
                  min(part_no * part_size, evt["total_size"]) - 1)
    # src_client.get_object_to_file(evt["key"], "/tmp/testpart%d" % part_no, byte_range=byte_range)
    object_stream = src_client.get_object(evt["key"], byte_range=byte_range)
    res = dest_client.upload_part(evt["key"], evt["upload_id"], part_no,
                                  object_stream)
    return {"part_no": part_no, "etag": res.etag}
Example #3
0
def handler(event, context):
  logger = logging.getLogger()
  evt = json.loads(event)
  logger.info("Handling event: %s", evt)
  src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
  src_bucket = evt["src_bucket"]
  src_client = clients.get(src_bucket)
  if src_client is None:
    src_client = get_oss_client(context, src_endpoint, src_bucket)
    clients[src_bucket] = src_client
  dest_bucket = evt["dest_bucket"]
  dest_client = clients.get(dest_bucket)
  if dest_client is None:
    dest_client = get_oss_client(context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role"))
    clients[dest_bucket] = dest_client

  crcs = evt.get("crcs")
  failed_crcs = []

  start_time = time.time()

  count = evt.get("count", 10)
  base_size = evt.get("base_size", 1024)
  def producer(queue):
    for i in range(count):
      queue.put(i)


  def consumer(queue):
    while queue.ok():
      item = queue.get()
      if item is None:
          break
      i = item
      key = '%s/%d' % (evt["prefix"], i)
      result = dest_client.head_object(key)
      crc = result.headers["x-oss-hash-crc64ecma"]
      if crcs != None:
        if crc != str(crcs[i]):
          logger.info("expected %s, actual %s", crcs[i], crc)
          failed_crcs.append(i)
      else:
        result = src_client.head_object(key)
        src_crc = result.headers["x-oss-hash-crc64ecma"]
        if crc != str(src_crc):
          logger.info("expected %s, actual %s", src_crc, crc)
          failed_crcs.append(i)

  task_q = TaskQueue(producer, [consumer] * 16)
  task_q.run()

  return {'failed_crcs': failed_crcs, 'success': len(failed_crcs) == 0}
Example #4
0
def handler(event, context):
  logger = logging.getLogger()
  evt = json.loads(event)
  logger.info("Handling event: %s", evt)
  src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
  src_bucket = evt["src_bucket"]
  src_client = clients.get(src_bucket)
  if src_client is None:
    src_client = get_oss_client(context, src_endpoint, src_bucket)
    clients[src_bucket] = src_client
  dest_bucket = evt["dest_bucket"]
  dest_client = clients.get(dest_bucket)
  if dest_client is None:
    dest_client = get_oss_client(context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role"))
    clients[dest_bucket] = dest_client

  copy(src_client, dest_client, evt["key"])

  return {}
Example #5
0
def handler(event, context):
    logger = logging.getLogger()
    evt = json.loads(event)
    logger.info("Handling event: %s", evt)
    src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
    src_bucket = evt["src_bucket"]
    src_client = clients.get(src_bucket)
    if src_client is None:
        src_client = get_oss_client(context, src_endpoint, src_bucket)
        clients[src_bucket] = src_client
    dest_bucket = evt["dest_bucket"]
    dest_client = clients.get(dest_bucket)
    if dest_client is None:
        dest_client = get_oss_client(
            context,
            evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT')
            or src_endpoint, dest_bucket, evt.get("dest_access_role"))
        clients[dest_bucket] = dest_client

    parts = copy(gen_parts, src_client, dest_client, evt["key"],
                 evt["part_size"], evt["total_size"], evt["upload_id"],
                 evt["group_id"], evt["num_of_parts_per_group"],
                 evt["total_num_of_parts"])
    return {"parts": parts}
Example #6
0
def handler(event, context):
    logger = logging.getLogger()
    evt = json.loads(event)
    logger.info("Handling event: %s", evt)
    src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
    dest_bucket = evt["dest_bucket"]
    dest_client = clients.get(dest_bucket)
    if dest_client is None:
        dest_client = get_oss_client(
            context,
            evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT')
            or src_endpoint, dest_bucket, evt.get("dest_access_role"))
        clients[dest_bucket] = dest_client

    parts = []
    for part in evt["parts"]:
        parts.append(PartInfo(part["part_no"], part["etag"]))

    dest_client.complete_multipart_upload(evt["key"], evt["upload_id"], parts)

    return {}
Example #7
0
def handler(event, context):
    logger = logging.getLogger()
    evt = json.loads(event)
    logger.info("Handling event: %s", evt)
    src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
    src_bucket = evt["src_bucket"]
    src_client = clients.get(src_bucket)
    if src_client is None:
        src_client = get_oss_client(context, src_endpoint, src_bucket)
        clients[src_bucket] = src_client

    has_more = False
    marker = evt["marker"]
    group_threshold = evt.get("group_threshold", 50)
    total_group_count = evt.get("total_group_count", 0)
    medium_file_limit = evt.get("medium_file_limit")
    small_file_limit = evt.get("small_file_limit")
    small_group_total = 0
    small_group = [
    ]  # The total size of small files is less than medium_file_limit
    small = []  # An array of small_group
    medium = []
    large = []
    current_group_size = 0
    leave_early = False

    while True:
        result = src_client.list_objects(prefix=evt["prefix"],
                                         marker=marker,
                                         delimiter=evt["delimiter"],
                                         max_keys=500)
        logger.info("Found %d objects", len(result.object_list))
        marker = result.next_marker
        has_more = result.is_truncated
        # A function can process amount of files up to medium_file_limit
        for i in range(0, len(result.object_list)):
            obj = result.object_list[i]
            logger.info("key: %s, size: %s, group size: %d", obj.key, obj.size,
                        current_group_size)
            if (current_group_size * medium_file_limit + small_group_total +
                    obj.size + medium_file_limit -
                    1) // medium_file_limit > group_threshold:
                # Leave early and override has_more and marker
                has_more = True
                leave_early = True
                marker = result.object_list[i].key
                break
            # Group small files as many as possible but their total size should not exceed medium_file_limit
            if obj.size <= small_file_limit:
                if obj.size + small_group_total <= medium_file_limit:
                    small_group_total += obj.size
                    small_group.append(obj.key)
                else:
                    small.append(small_group)
                    small_group_total = obj.size
                    small_group = []
                    small_group.append(obj.key)
                    current_group_size += 1
            elif obj.size <= medium_file_limit:
                medium.append([obj.key, obj.size])
                current_group_size += 1
            else:
                large.append([obj.key, obj.size])
                # The large file will be divided into small groups and each group size is up to medium_file_limit
                current_group_size += (obj.size + medium_file_limit -
                                       1) // medium_file_limit

        if not has_more or leave_early:
            break

    if len(small_group) > 0:
        small.append(small_group)

    total_group_count += (current_group_size * medium_file_limit +
                          small_group_total + medium_file_limit -
                          1) // medium_file_limit

    return {
        "small":
        small,  # [["key1","key2","key3"],["key4","key5"]]
        "medium":
        medium,  # [["key9",size],["key11",size]]
        "large":
        large,  # [["key6",size],["key7",size]]
        "has_more":
        has_more,
        "marker":
        marker,
        "total_group_count":
        total_group_count,
        "execution_name":
        evt.get("execution_name", "") + "-" +
        re.sub(r"[^a-zA-Z0-9-_]", "_", marker)
    }