Esempio n. 1
0
def copy(src_client, dest_client, key, part_size, total_size):
  logger = logging.getLogger()
  logger.info("Starting to copy %s", key)
  start_time = time.time()
  upload_id = dest_client.init_multipart_upload(key).upload_id

  def producer(queue):
    # Decide the number of parts
    num_of_parts = (total_size + part_size - 1) // part_size
    for part_no in range(1, num_of_parts+1):
      part_range = ((part_no-1)*part_size, min(part_no*part_size, total_size)-1)
      queue.put((part_no, part_range))


  parts = []
  def consumer(queue):
    while queue.ok():
      item = queue.get()
      if item is None:
          break

      part_no, part_range = item
      object_stream = src_client.get_object(key, byte_range=part_range)
      res = dest_client.upload_part(key, upload_id, part_no, object_stream)
      parts.append(PartInfo(part_no, res.etag))

  task_q = TaskQueue(producer, [consumer] * 16)
  task_q.run()

  dest_client.complete_multipart_upload(key, upload_id, parts)
  end_time = time.time()
  logger.info('Copied %s in %s secs', key, end_time-start_time)
Esempio n. 2
0
def copy(src_client, dest_client, keys):
    logger = logging.getLogger()
    logger.info("Starting to copy %d objects", len(keys))
    start_time = time.time()

    def producer(queue):
        for key in keys:
            queue.put(key)

    parts = []

    def consumer(queue):
        while queue.ok():
            item = queue.get()
            if item is None:
                break
            key = item
            object_stream = src_client.get_object(key)
            res = dest_client.put_object(key, object_stream)

    task_q = TaskQueue(producer, [consumer] * 16)
    task_q.run()

    end_time = time.time()
    logger.info('Copied %d objects in %s secs', len(keys),
                end_time - start_time)
Esempio n. 3
0
def copy(gen_parts, src_client, dest_client, key, part_size, total_size,
         upload_id, group_id, num_of_parts_per_group, total_num_of_parts):
    logger = logging.getLogger()
    logger.info("Starting to copy %s, group_id %d", key, group_id)
    start_time = time.time()

    def producer(queue):
        gen_parts(queue, part_size, total_size, group_id,
                  num_of_parts_per_group, total_num_of_parts)

    parts = []

    def consumer(queue):
        while queue.ok():
            item = queue.get()
            if item is None:
                break

            part_no, part_range = item
            logger.info("%d -> %s", part_no, part_range)
            object_stream = src_client.get_object(key, byte_range=part_range)
            res = dest_client.upload_part(key, upload_id, part_no,
                                          object_stream)
            parts.append({"part_no": part_no, "etag": res.etag})

    task_q = TaskQueue(producer, [consumer] * 16)
    task_q.run()

    end_time = time.time()
    logger.info('Copied %s in %s secs', key, end_time - start_time)
    return parts
Esempio n. 4
0
def zip_files(oss_client, source_dir, source_files, dest_file):
    LOG.info('create zip, source_dir: %s, source_files: %s, dest_file: %s',
             source_dir, source_files, dest_file)

    start_time = time.time()
    upload_id = oss_client.init_multipart_upload(dest_file).upload_id

    def zip_add_file(zip_file, key):
        LOG.info('add zip file: %s', key)
        if key[-1] == '/':
            return
        obj = oss_client.get_object(key)
        zip_file.write_file(key[len(source_dir):],
                            obj,
                            compress_type=zipfile.ZIP_STORED)

    def producer(queue):
        mem_buf = MemBuffer(queue)
        zip_file = StreamZipFile(mem_buf, 'w')

        if isinstance(source_files, list):
            for obj in source_files:
                zip_add_file(zip_file, obj)
        elif isinstance(source_dir, basestring):
            for obj in oss2.ObjectIterator(oss_client, prefix=source_dir):
                zip_add_file(zip_file, obj.key)
        else:
            raise Exception(
                'either `source_files` or `source_dir` must be speicified')

        zip_file.close()
        mem_buf.flush_buffer()

    parts = []

    def consumer(queue):
        while queue.ok():
            item = queue.get()
            if item is None:
                break

            part_no, part_data = item
            res = oss_client.upload_part(dest_file, upload_id, part_no,
                                         part_data)
            parts.append(PartInfo(part_no, res.etag))

    task_q = TaskQueue(producer, [consumer] * 16)
    task_q.run()

    oss_client.complete_multipart_upload(dest_file, upload_id, parts)
    end_time = time.time()
    LOG.info('create zip, cost: %s secs', end_time - start_time)

    return dest_file
Esempio n. 5
0
def handler(event, context):
  logger = logging.getLogger()
  evt = json.loads(event)
  logger.info("Handling event: %s", evt)
  src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
  src_bucket = evt["src_bucket"]
  src_client = clients.get(src_bucket)
  if src_client is None:
    src_client = get_oss_client(context, src_endpoint, src_bucket)
    clients[src_bucket] = src_client
  dest_bucket = evt["dest_bucket"]
  dest_client = clients.get(dest_bucket)
  if dest_client is None:
    dest_client = get_oss_client(context, evt.get("dest_oss_endpoint") or os.environ.get('DEST_OSS_ENDPOINT') or src_endpoint, dest_bucket, evt.get("dest_access_role"))
    clients[dest_bucket] = dest_client

  crcs = evt.get("crcs")
  failed_crcs = []

  start_time = time.time()

  count = evt.get("count", 10)
  base_size = evt.get("base_size", 1024)
  def producer(queue):
    for i in range(count):
      queue.put(i)


  def consumer(queue):
    while queue.ok():
      item = queue.get()
      if item is None:
          break
      i = item
      key = '%s/%d' % (evt["prefix"], i)
      result = dest_client.head_object(key)
      crc = result.headers["x-oss-hash-crc64ecma"]
      if crcs != None:
        if crc != str(crcs[i]):
          logger.info("expected %s, actual %s", crcs[i], crc)
          failed_crcs.append(i)
      else:
        result = src_client.head_object(key)
        src_crc = result.headers["x-oss-hash-crc64ecma"]
        if crc != str(src_crc):
          logger.info("expected %s, actual %s", src_crc, crc)
          failed_crcs.append(i)

  task_q = TaskQueue(producer, [consumer] * 16)
  task_q.run()

  return {'failed_crcs': failed_crcs, 'success': len(failed_crcs) == 0}
Esempio n. 6
0
def handler(event, context):
  logger = logging.getLogger()
  evt = json.loads(event)
  logger.info("Handling event: %s", evt)
  src_endpoint = 'https://oss-%s-internal.aliyuncs.com' % context.region
  dest_client = get_oss_client(context, src_endpoint, evt["src_bucket"])

  start_time = time.time()

  count = evt.get("count", 10)
  base_size = evt.get("base_size", 1024)
  def producer(queue):
    for i in range(count):
      queue.put(i)


  crcs = [None]*count
  def consumer(queue):
    while queue.ok():
      item = queue.get()
      if item is None:
          break
      i = item
      size = random.gauss(64, 64)
      size = size if size > 0 else 1
      size = size if size < 128 else 128
      chars = ''.join([random.choice(string.printable) for i in range(int(size*base_size))])
      key = '%s/%d' % (evt["prefix"], i)
      result = dest_client.put_object(key, chars)
      crcs[i]= result.crc

  task_q = TaskQueue(producer, [consumer] * 16)
  task_q.run()

  end_time = time.time()
  logger.info('Saved %d objects in %s secs', len(crcs), end_time-start_time)

  return {"crcs": crcs}