コード例 #1
0
class S3AsyncGetObject:
    def __init__(self, session, request_id,
                 bucket_name, object_name,
                 object_size, offset,
                 length):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name
        self._object_size = object_size
        self._range_read_offset = offset
        self._range_read_length = length

        self.remote_down = False
        self._http_status = None
        self._object_range = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_execution_time(self):
        """Return total time for GET Object operation."""
        return self._timer.elapsed_time_ms()

    def get_etag(self):
        """Returns ETag for object."""
        return self._response_headers["ETag"].strip("\"")

    def get_content_length(self):
        """Get content length."""
        return int(self._response_headers["Content-Length"])

    def get_total_object_range(self):
        """Get object range."""
        return self._object_range

    # yields data chunk for given size
    async def fetch(self, chunk_size):
        request_uri = AWSV4Signer.fmt_s3_request_uri(
            self._bucket_name, self._object_name)

        query_params = ""
        body = ""

        # check for range read request
        if self._range_read_length >= 0:
            # get object range read function
            start_bytes = self._range_read_offset
            end_bytes = self._range_read_offset + self._range_read_length
            object_range = "bytes=" + str(start_bytes) + "-" + str(end_bytes)
            total_to_fetch = (end_bytes - start_bytes) + 1
        else:
            # get object
            object_range = None
            total_to_fetch = self._object_size

        headers = AWSV4Signer(
            self._session.endpoint,
            self._session.service_name,
            self._session.region,
            self._session.access_key,
            self._session.secret_key).prepare_signed_header(
            'GET',
            request_uri,
            query_params,
            body,
            object_range)

        if (headers['Authorization'] is None):
            self._logger.error(fmt_reqid_log(self._request_id) +
                               "Failed to generate v4 signature")
            sys.exit(-1)

        self._logger.info(fmt_reqid_log(self._request_id) +
                          'GET on {}'.format(
                              self._session.endpoint + request_uri))
        self._logger.debug(fmt_reqid_log(self._request_id) +
                           "GET with headers {}".format(headers))
        self._timer.start()
        try:
            async with self._session.get_client_session().get(
                    self._session.endpoint + request_uri,
                    headers=headers) as resp:
                print("response {} ".format(resp))
                self._http_status = resp.status
                self._response_headers = resp.headers

                if object_range is None:
                    if resp.status == 200:
                        # get object successful with 200 status code
                        self._logger.info(
                            fmt_reqid_log(self._request_id) +
                            'GET Object completed with http status: {}'.format(
                                resp.status))
                    else:
                        self._state = S3RequestState.FAILED
                        error_msg = await resp.text()
                        self._logger.error(
                            fmt_reqid_log(self._request_id) +
                            'GET Object failed with http status: {}'.
                            format(resp.status) +
                            '\nError Response: {}'.format(error_msg))
                        return
                else:
                    if resp.status == 206:
                        # get object range read successful with 206 status code
                        self._logger.info(fmt_reqid_log(
                            self._request_id) + 'GET object range read'
                            'completed with http status: {}'.format(
                            resp.status))
                    else:
                        self._state = S3RequestState.FAILED
                        error_msg = await resp.text()
                        self._logger.error(
                            fmt_reqid_log(self._request_id) +
                            'GET object range read failed '
                            'with http status: {}'. format(resp.status)
                            + ' Error Response: {}'.format(error_msg))
                        return

                self._state = S3RequestState.RUNNING
                while True:
                    # If abort requested, stop the loop and return.
                    if self._state == S3RequestState.ABORTED:
                        self._logger.debug(
                            fmt_reqid_log(self._request_id) +
                            "Aborted after reading %d bytes"
                            "for object size of %d",
                            (self._object_size - total_to_fetch,
                             self._object_size))
                        break

                    data_chunk = await resp.content.read(chunk_size)
                    self._object_range = len(data_chunk)
                    if not data_chunk:
                        break
                    self._logger.debug(
                        fmt_reqid_log(self._request_id) +
                        "Received data_chunk of size {} bytes.".format(
                            len(data_chunk)))
                    yield data_chunk

                    total_to_fetch = total_to_fetch - len(data_chunk)
                    if total_to_fetch == 0:
                        # Completed reading all expected data.
                        self._state = S3RequestState.COMPLETED
                        break
                    elif total_to_fetch < 0:
                        self._state = S3RequestState.FAILED
                        self._logger.error(
                            fmt_reqid_log(self._request_id) +
                            "Received %d more bytes than"
                            "expected object size of %d",
                            (total_to_fetch * -1,
                             self._object_size))
                # end of While True

                if self._state != S3RequestState.ABORTED:
                    if total_to_fetch > 0:
                        self._state = S3RequestState.FAILED
                        self._logger.error(
                            fmt_reqid_log(self._request_id) +
                            "Received partial object."
                            "Expected object size (%d), "
                            "Actual received size (%d)",
                            self._object_size,
                            self._object_size - total_to_fetch)
        except aiohttp.client_exceptions.ClientConnectorError as e:
            self.remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(fmt_reqid_log(self._request_id) +
                               "Failed to connect to S3: " + str(e))
        self._timer.stop()
        return

    def pause(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real pause action

    def resume(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real resume action

    def abort(self):
        self._state = S3RequestState.ABORTED
コード例 #2
0
class ObjectReplicator:
    def __init__(self, job, transfer_chunk_size_bytes, range_read_offset,
                 range_read_length, source_session, target_session) -> None:
        """Initialise."""
        self._transfer_chunk_size_bytes = transfer_chunk_size_bytes
        self._job_id = job.get_job_id()
        self._request_id = self._job_id
        self._timer = Timer()
        self._range_read_offset = range_read_offset
        self._range_read_length = range_read_length

        # A set of observers to watch for varius notifications.
        # To start with job completed (success/failure)
        self._observers = {}

        self._s3_source_session = source_session

        self._object_source_reader = S3AsyncGetObject(
            self._s3_source_session, self._request_id,
            job.get_source_bucket_name(), job.get_source_object_name(),
            int(job.get_source_object_size()), self._range_read_offset,
            self._range_read_length)

        self._source_replication_status = S3AsyncUpdatereplicationStatus(
            self._s3_source_session, self._request_id,
            job.get_source_owner_account_id(), job.get_source_bucket_name(),
            job.get_source_object_name())

        # Setup target site info
        self._s3_target_session = target_session

        self._object_writer = S3AsyncPutObject(
            self._s3_target_session, self._request_id,
            job.get_target_bucket_name(), job.get_source_object_name(),
            int(job.get_source_object_size()))

        self._object_target_reader = S3AsyncGetObject(
            self._s3_target_session, self._request_id,
            job.get_target_bucket_name(), job.get_source_object_name(),
            int(job.get_source_object_size()), self._range_read_offset,
            self._range_read_length)

    def get_execution_time(self):
        """Return total time for Object replication."""
        return self._timer.elapsed_time_ms()

    def setup_observers(self, label, observer):
        self._observers[label] = observer

    async def start(self):
        # Start transfer
        self._timer.start()
        await self._object_writer.send(self._object_source_reader,
                                       self._transfer_chunk_size_bytes)
        self._timer.stop()
        _logger.info("Replication completed in {}ms for job_id {}".format(
            self._timer.elapsed_time_ms(), self._job_id))

        # notify job state events
        for label, observer in self._observers.items():
            _logger.debug(
                "Notify completion to observer with label[{}]".format(label))
            if self._object_writer.get_state() == S3RequestState.PAUSED:
                await observer.notify(JobEvents.STOPPED, self._job_id)
            elif self._object_writer.get_state() == S3RequestState.ABORTED:
                await observer.notify(JobEvents.ABORTED, self._job_id)
            else:
                await observer.notify(JobEvents.COMPLETED, self._job_id)

        if JobEvents.COMPLETED:
            await self._source_replication_status.update('COMPLETED')

            source_etag = self._object_source_reader.get_etag()
            target_etag = self._object_writer.get_etag()

            _logger.info("MD5 : Source {} and Target {}".format(
                source_etag, target_etag))

            # check md5 of source and replicated objects at target
            if source_etag == target_etag:
                _logger.info("MD5 matched for job_id {}".format(self._job_id))
            else:
                _logger.error("MD5 not matched for job_id {}".format(
                    self._job_id))

            # check content length of source and target objects
            # [system-defined metadata]
            reader_generator = self._object_target_reader.fetch(
                self._transfer_chunk_size_bytes)
            async for _ in reader_generator:
                pass

            source_content_length = self._object_source_reader.get_content_length(
            )
            target_content_length = self._object_target_reader.get_content_length(
            )

            _logger.info("Content Length : Source {} and Target {}".format(
                source_content_length, target_content_length))

            if source_content_length == target_content_length:
                _logger.info("Content length matched for job_id {}".format(
                    self._job_id))
            else:
                _logger.error(
                    "Content length not matched for job_id {}".format(
                        self._job_id))

    def pause(self):
        """Pause the running object tranfer."""
        pass  # XXX

    def resume(self):
        """Resume the running object tranfer."""
        pass  # XXX

    def abort(self):
        """Abort the running object tranfer."""
        self._object_writer.abort()
コード例 #3
0
class S3AsyncGetObjectTagging:
    def __init__(self, session, request_id, bucket_name, object_name):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name

        self._remote_down = False
        self._http_status = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_execution_time(self):
        """Return total time for GET operation."""
        return self._timer.elapsed_time_ms()

    def get_tags_count(self):
        """Returns tags count."""
        return len(self._response_tags_dict)

    def get_tags_dict(self):
        """Returns tags dictionary."""
        return self._response_tags_dict

    def get_tags_value(self, key):
        """Returns the value for the given key."""
        self._resp_tags_value = self._response_tags_dict.get(key, None)
        return self._resp_tags_value

    async def fetch(self):
        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name,
                                                     self._object_name)

        query_params = urllib.parse.urlencode({'tagging': None})
        body = ""
        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'GET', request_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            'GET on {}'.format(self._session.endpoint + request_uri))
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "GET Request Header {}".format(headers))

        self._timer.start()
        try:
            async with self._session.get_client_session().get(
                    self._session.endpoint + request_uri,
                    params=query_params,
                    headers=headers) as resp:

                self._logger.info(
                    fmt_reqid_log(self._request_id) +
                    'GET response received with' +
                    ' status code: {}'.format(resp.status))
                self._logger.info(
                    'Response url {}'.format(self._session.endpoint +
                                             request_uri))

                if resp.status == 200:
                    self._response_headers = resp.headers
                    received_tagset = await resp.text()
                    self._logger.info(
                        "Received tagset {}".format(received_tagset))

                    # Remove namespace using regular expression
                    # search and replace given pattern from the given string
                    received_tagset = re.sub('xmlns="[^"]+"', '',
                                             received_tagset)

                    # Parse XML response
                    root = fromstring(received_tagset)

                    tags_dict = {}
                    # Find all Tags elements in the entire tree.
                    for ele in root.findall(".//Tag"):
                        key = ele.find('Key').text
                        value = ele.find('Value').text
                        tags_dict[key] = value
                    self._response_tags_dict = tags_dict

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'GET failed with http status: {}'.format(resp.status) +
                        'Error Response: {}'.format(error_msg))
                    return

        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        self._timer.stop()
        return
コード例 #4
0
class S3AsyncCompleteMultipartUpload:
    def __init__(self, session, request_id, bucket_name, object_name,
                 upload_id, etag_dict):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name

        self._upload_id = upload_id
        self._etag_dict = etag_dict

        self._remote_down = False
        self._http_status = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_response_header(self, header_key):
        """Returns response http header value."""
        self._resp_header_key = self._response_headers.get(header_key, None)
        return self._resp_header_key

    def get_execution_time(self):
        """Return total time for GET operation."""
        return self._timer.elapsed_time_ms()

    def get_final_etag(self):
        """Returns final etag after multipart completion."""
        return self._final_etag

    async def complete_upload(self):
        self._state = S3RequestState.RUNNING
        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name,
                                                     self._object_name)
        query_params = urllib.parse.urlencode({'uploadId': self._upload_id})
        body = ""

        # Prepare xml format
        etag_str = "<CompleteMultipartUpload>"
        for part, etag in self._etag_dict.items():
            etag_str += "<Part><ETag>" + \
                str(etag) + "</ETag><PartNumber>" + str(part) + "</PartNumber></Part>"
        etag_str += "</CompleteMultipartUpload>"

        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'POST', request_uri, query_params, body)

        # check the header signature
        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            'POST on {}'.format(self._session.endpoint + request_uri))
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "POST Request Header {}".format(headers))

        self._timer.start()
        try:
            async with self._session.get_client_session().post(
                    self._session.endpoint + request_uri,
                    data=etag_str,
                    params=query_params,
                    headers=headers) as resp:

                self._logger.info(
                    fmt_reqid_log(self._request_id) +
                    'POST response received with' +
                    ' status code: {}'.format(resp.status))
                self._logger.info(
                    'Response url {}'.format(self._session.endpoint +
                                             request_uri))

                if resp.status == 200:
                    self._state = S3RequestState.COMPLETED
                    # Get the response header and body
                    self._response_headers = resp.headers
                    self._logger.info('Response headers {}'.format(
                        self._response_headers))

                    # Response body
                    resp_body = await resp.text()

                    # Remove the namespace from response body elements
                    resp_body = re.sub('xmlns="[^"]+"', '', resp_body)
                    xml_dict = fromstring(resp_body)

                    # Get the ETag from response body
                    self._final_etag = xml_dict.find('ETag').text

                else:
                    # show the error messages
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'POST failed with http status: {}'.format(resp.status)
                        + ' Error Response: {}'.format(error_msg))
                    return

        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        self._timer.stop()
        return
コード例 #5
0
class ReplicationManager:
    def __init__(self, manager_endpoint):
        """Initialise ReplicationManager object."""
        # Id generated locally.
        self.id = str(uuid.uuid4())
        self.endpoint = manager_endpoint
        # Id returned for remote replication manager after subscribe.
        self.subscriber_id = None
        self.client_session = aiohttp.ClientSession()

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    async def close(self):
        await self.client_session.close()

    def get_dictionary(self):
        return {
            "id": self.id,
            "endpoint": self.endpoint,
            "subscriber_id": self.subscriber_id
        }

    async def subscribe(self, replicator_endpoint, prefetch_count):
        """Subscribe to remote replication manager for jobs.

        Args
        -----
            replicator_endpoint (str): url for replicator (current process).
            prefetch_count (int): maximum count of jobs to receive from
            replication manager.

        Returns
        -------
            bool: True when subscribed successfully, False when failed.
        """
        subscriber_payload = subscribe_payload_template()

        subscriber_payload.pop("id")  # replication manager will generate.
        subscriber_payload["endpoint"] = replicator_endpoint
        subscriber_payload["prefetch_count"] = prefetch_count

        resource_url = url_with_resources(self.endpoint, ["subscribers"])
        req_id = str(uuid.uuid4())
        _logger.info(fmt_reqid_log(req_id) + "PUT on {}".format(resource_url))
        self._timer.start()
        try:
            self._state = S3RequestState.RUNNING
            async with self.client_session.post(
                    resource_url, json=subscriber_payload) as response:
                self._timer.stop()

                _logger.info(
                    fmt_reqid_log(req_id) +
                    'HTTP Response: Status: {}'.format(response.status))

                if response.status == 201:  # CREATED
                    # Subscribed successfully.
                    self._state = S3RequestState.COMPLETED

                    response_body = await response.json()
                    _logger.debug(
                        fmt_reqid_log(req_id) +
                        'HTTP Response: Body: {}'.format(response_body))

                    self.subscriber_id = response_body["id"]
                else:
                    # Failed to Subscribe.
                    self._state = S3RequestState.FAILED
                    _logger.error(
                        fmt_reqid_log(req_id) + "Failed to Subscribe.")
        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._timer.stop()
            self._state = S3RequestState.FAILED
            _logger.error(
                fmt_reqid_log(req_id) +
                "Failed to connect to Replication Manager: " + str(e))
        if self._state == S3RequestState.COMPLETED:
            return True
        else:
            return False

    # Post the job status update to replicator.
    async def send_update(self, job_id, status):
        """Updates replication manager with job status.

        Args
        -----
            job_id (str): Job ID at the replication manager.
            status (str): completed/failed/aborted.

        Returns
        -------
            bool: True when status updated successfully, False when failed.
        """
        headers = {"Content-Type": "application/json"}
        payload = {"status": status}

        resource_url = url_with_resources(self.endpoint, ["jobs", job_id])
        req_id = str(uuid.uuid4())

        _logger.info(fmt_reqid_log(req_id) + 'PUT on {}'.format(resource_url))
        _logger.debug(
            fmt_reqid_log(req_id) + "PUT with headers {}".format(headers))
        _logger.debug(fmt_reqid_log(req_id) + "PUT content {}".format(payload))

        self._timer.start()
        try:
            self._state = S3RequestState.RUNNING
            async with self.client_session.put(resource_url,
                                               headers=headers,
                                               json=payload) as resp:
                self._timer.stop()

                self._response_headers = resp.headers

                self.http_status = resp.status
                self.response = await resp.json()

                _logger.info(
                    fmt_reqid_log(req_id) +
                    'PUT on {} returned http status: {}'.format(
                        resource_url, resp.status))

                if resp.status == 200:
                    self._state = S3RequestState.COMPLETED
                    _logger.info(
                        fmt_reqid_log(req_id) +
                        'PUT on {} returned Response: {}'.format(
                            resource_url, self.response))
                else:
                    self._state = S3RequestState.FAILED
                    _logger.error(
                        fmt_reqid_log(req_id) +
                        'PUT on {} returned Response: {}'.format(
                            resource_url, self.response))

        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._timer.stop()
            self._state = S3RequestState.FAILED
            self.remote_down = True
            _logger.error('Failed to connect to Replication manager: ' +
                          str(e))

        if self._state == S3RequestState.COMPLETED:
            return True
        else:
            return False
コード例 #6
0
class S3AsyncPutObjectTagging:
    def __init__(self, session, request_id, bucket_name, object_name,
                 obj_tag_set):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name

        self._tag_set = obj_tag_set

        self._remote_down = False
        self._http_status = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_execution_time(self):
        """Return total time for GET operation."""
        return self._timer.elapsed_time_ms()

    async def send(self):

        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name,
                                                     self._object_name)
        query_params = urllib.parse.urlencode({'tagging': ''})
        body = ""

        # Prepare tag xml format
        tag_str1 = "<Tagging><TagSet>"
        tag_str2 = "</TagSet></Tagging>"
        result = ""
        for key, val in (self._tag_set).items():
            result = result + "<Tag><Key>" + key + "</Key><Value>" + val + "</Value></Tag>"

        tagset = tag_str1 + result + tag_str2

        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'PUT', request_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            'PUT on {}'.format(self._session.endpoint + request_uri))
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "PUT Request Header {}".format(headers))

        self._timer.start()
        try:
            async with self._session.get_client_session().put(
                    self._session.endpoint + request_uri,
                    data=tagset,
                    params=query_params,
                    headers=headers) as resp:

                self._logger.info(
                    fmt_reqid_log(self._request_id) +
                    'PUT response received with' +
                    ' status code: {}'.format(resp.status))
                self._logger.info(
                    'Response url {}'.format(self._session.endpoint +
                                             request_uri))

                if resp.status == 200:
                    self._response_headers = resp.headers
                    self._logger.info('Response headers {}'.format(
                        self._response_headers))

                    # Delete temporary tagset file.
                    os.system('rm -rf tagset.xml')

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'PUT failed with http status: {}'.format(resp.status) +
                        ' Error Response: {}'.format(error_msg))
                    return

        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        self._timer.stop()
        return
コード例 #7
0
class S3AsyncUpdatereplicationStatus:
    def __init__(self, session, request_id, account_id, bucket_name,
                 object_name):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._account_id = account_id
        self._bucket_name = bucket_name
        self._object_name = object_name

        self._remote_down = False
        self._http_status = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

        self._bucket_metadata_index_id = "AAAAAAAAAHg=-AgAQAAAAAAA="

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_execution_time(self):
        """Return total time for HEAD Object operation."""
        return self._timer.elapsed_time_ms()

    def kv_session(self, index, key, value=None):
        """Set up connection context for admin KV store API."""
        canonical_uri = '/indexes/{}/{}'.format(
            urllib.parse.quote(index, safe=""), urllib.parse.quote(key))
        request_uri = self._session.admin_endpoint + canonical_uri

        query_params = ""
        body = value or ""
        headers = AWSV4Signer(self._session.admin_endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'GET' if value is None else 'PUT',
                                  canonical_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            'Motr index operation on {} {}'.format(request_uri, body))

        if value is None:
            # Called without a new value, assumed to be an HTTP GET
            return self._session.get_client_session().get(URL(request_uri,
                                                              encoded=True),
                                                          params=query_params,
                                                          headers=headers)
        else:
            # Going to PUT the new value
            return self._session.get_client_session().put(URL(request_uri,
                                                              encoded=True),
                                                          params=query_params,
                                                          headers=headers,
                                                          data=body.encode())

    async def update(self, status):
        """Use KV store admin API to update x-amz-replication-status."""
        self._timer.start()
        self._state = S3RequestState.RUNNING

        try:
            # After integration with service account,
            # this might become mandatory. Skip for now to
            # avoid breaking existing code.
            if self._session.admin_endpoint is None:
                self._logger.warn(
                    fmt_reqid_log(self._request_id) +
                    'Admin API not configured, ' +
                    'skipping source metadata update')
                self._state = S3RequestState.COMPLETED
                return

            # Step 1. Get bucket metadata
            # This is needed to figure out the Motr index holding
            # object metadata for this bucket.
            async with self.kv_session(
                    self._bucket_metadata_index_id,
                    # The key in the bucket index is of the form
                    #     <account-id>/<bucket-name>
                    self._account_id + '/' + self._bucket_name) as resp:

                if resp.status == 200:
                    bucket_metadata = await resp.json(content_type=None)

                    self._logger.info(
                        fmt_reqid_log(self._request_id) +
                        'Bucket index lookup for {} response'.format(
                            self._bucket_name) +
                        ' received with status code: {}'.format(resp.status))
                    self._logger.debug(
                        'bucket metadata: {}'.format(bucket_metadata))

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'Index operation failed with http status: {}'.format(
                            resp.status) +
                        ' Error Response: {}'.format(error_msg))
                    return

            # Magic part: the object list index layout seems to be a
            # base64 encoded memory dump of a C struct. We first decode,
            # then slice the high and low 64 bit integer values of the
            # Motr index ID we want. The server expects this 128 bit ID
            # as base64 encoded halves separated by a dash, like
            #     AAAAAAAAAHg=-AgAQAAAAAAA=
            layout = base64.b64decode(
                bucket_metadata['motr_object_list_index_layout'])
            id_hi = base64.b64encode(layout[0:8]).decode()
            id_lo = base64.b64encode(layout[8:16]).decode()
            metadata_index = id_hi + '-' + id_lo

            # Step 2. GET object metadata
            async with self.kv_session(metadata_index,
                                       self._object_name) as resp:

                if resp.status == 200:
                    object_metadata = await resp.json(content_type=None)
                    self._logger.info(
                        fmt_reqid_log(self._request_id) +
                        'Object index lookup for {} in {}'.format(
                            self._object_name, metadata_index) +
                        ' response received with' +
                        ' status code: {}'.format(resp.status))
                    self._logger.debug(
                        'object metadata: {}'.format(object_metadata))

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'Index operation failed with http status: {}'.format(
                            resp.status) +
                        ' Error Response: {}'.format(error_msg))
                    return

            # Step 3. Set replication status to the provided value
            object_metadata['x-amz-replication-status'] = status

            # Step 4. PUT updated object metadata
            async with self.kv_session(metadata_index, self._object_name,
                                       json.dumps(object_metadata)) as resp:

                if resp.status == 200:
                    self._logger.info(
                        fmt_reqid_log(self._request_id) +
                        'Set x-amz-replication-status for ' +
                        '{} to {}, response received with'.format(
                            self._object_name, status) +
                        ' status code: {}'.format(resp.status))
                    self._logger.debug(
                        'updated object metadata: {}'.format(object_metadata))

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'Index operation failed with http status: {}'.format(
                            resp.status) +
                        ' Error Response: {}'.format(error_msg))
                    return

            self._state = S3RequestState.COMPLETED

        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        finally:
            self._timer.stop()

    def pause(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real pause action

    def resume(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real resume action

    def abort(self):
        self._state = S3RequestState.ABORTED
コード例 #8
0
class S3AsyncHeadObject:
    def __init__(self, session, request_id, bucket_name, object_name,
                 version_id):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name

        self._version_id = version_id

        self.remote_down = False
        self._http_status = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_accept_ranges(self):
        """Get range of bytes for object.

        Returns
        -------
            [str]: indicates that a range of bytes was specified for object.
        """
        self._resp_accept_range = self._response_headers.get(
            "Accept-Ranges", None)
        return self._resp_accept_range

    def get_cache_control(self):
        """Get caching behavior for object.

        Returns
        -------
            [str]: if set, returns cache policy
            and maximum age before expiring.
        """
        self._resp_cache_control = self._response_headers.get(
            "Cache-Control", None)
        return self._resp_cache_control

    def get_content_disposition(self):
        """Get presentational information for object.

        Returns
        -------
            [str]: attached filename/information for object.
        """
        self._resp_content_disposition = self._response_headers.get(
            "Content-Disposition", None)
        return self._resp_content_disposition

    def get_content_encoding(self):
        """Get content encodings for object.

        Returns
        -------
            [str]: specifies content encodings applied
            to object.
        """
        self._resp_content_encoding = self._response_headers.get(
            "Content-Encoding", None)
        return self._resp_content_encoding

    def get_content_language(self):
        """Get content language for object.

        Returns
        -------
            [str]: specify language the object content is in.
        """
        self._resp_content_lang = self._response_headers.get(
            "Content-Language", None)
        return self._resp_content_lang

    def get_content_length(self):
        """Get content length of object.

        Returns
        -------
            [int]: total content length of object.
        """
        self._resp_content_length = self._response_headers.get(
            "Content-Length", None)
        if self._resp_content_length is not None:
            self._resp_content_length = int(self._resp_content_length)
        return self._resp_content_length

    def get_content_type(self):
        """Get content type for object.

        Returns
        -------
            [str]: format of object data.
        """
        self._resp_content_type = self._response_headers.get(
            "Content-Type", None)
        return self._resp_content_type

    def get_etag(self):
        """Get etag for object.

        Returns
        -------
            [str]: opaque identifier.
        """
        self._resp_etag = self._response_headers.get("Etag", None)
        return self._resp_etag

    def get_expires(self):
        """Get date and time for object.

        Returns
        -------
            [str]: date and time at which the object is no longer cacheable.
        """
        self._resp_expires = self._response_headers.get("Expires", None)
        return self._resp_expires

    def get_last_modified(self):
        """Get last creation date of object.

        Returns
        -------
            [str]: date of the object.
        """
        self._resp_last_modified = self._response_headers.get(
            "Last-Modified", None)
        return self._resp_last_modified

    def get_server_name(self):
        """Get server name.

        Returns
        -------
            [str]: server name (SeagateS3 / AmazonS3).
        """
        self._resp_server_name = self._response_headers.get("Server", None)
        return self._resp_server_name

    def get_x_amz_archive_status(self):
        """Get archive state of the object.

        Returns
        -------
            [str]: archieve state (ARCHIVE_ACCESS / DEEP_ARCHIVE_ACCESS)
        """
        self._resp_archive_status = self._response_headers.get(
            "x-amz-archive-status", None)
        return self._resp_archive_status

    def get_x_amz_delete_marker(self):
        """Get delete marker status for object.

        Returns
        -------
            [bool]: True if object retrived was a Delete Marker, else False.
        """
        self._resp_delete_marker = self._response_headers.get(
            "x-amz-delete-marker", None)
        if self._resp_delete_marker is not None:
            self._resp_delete_marker = bool(self._resp_delete_marker)
        return self._resp_delete_marker

    def get_x_amz_expiration(self):
        """Get expiration configuration of object.

        Returns
        -------
            [str]: expiry date and rule-id, if enabled.
        """
        self._resp_expiration = self._response_headers.get(
            "x-amz-expiration", None)
        return self._resp_expiration

    def get_x_amz_missing_meta(self):
        """Get missing metadata entries of object.

        Returns
        -------
            [int]: value of the number of unprintable metadata entries.
        """
        self._resp_missing_data = self._response_headers.get(
            "x-amz-missing-meta", None)
        if self._resp_missing_data is not None:
            self._resp_missing_data = int(self._resp_missing_data)
        return self._resp_missing_data

    def get_x_amz_mp_parts_count(self):
        """Get part counts of object.

        Returns
        -------
            [int]: total part count of an object.
        """
        self._resp_parts_count = self._response_headers.get(
            "x-amz-mp-parts-count", None)
        if self._resp_parts_count is not None:
            self._resp_parts_count = int(self._resp_parts_count)
        return self._resp_parts_count

    def get_x_amz_object_lock_legal_hold(self):
        """Get legal hold status value for the object.

        Returns
        -------
            [str]: ON if a legal hold is in effect for the object, else OFF.
        """
        self._resp_legal_hold = self._response_headers.get(
            "x-amz-object-lock-legal-hold", None)
        return self._resp_legal_hold

    def get_x_amz_object_lock_mode(self):
        """Get lock mode of object.

        Returns
        -------
            [str]: Valid response values - GOVERNANCE / COMPLIANCE.
        """
        self._resp_lock_mode = self._response_headers.get(
            "x-amz-object-lock-mode", None)
        return self._resp_lock_mode

    def get_x_amz_object_lock_retain_until_date(self):
        """Get date and time retention period expires of object.

        Returns
        -------
            [str]: date and time when retention period expires.
        """
        self._resp_lock_retention = self._response_headers.get(
            "x-amz-object-lock-retain-until-date", None)
        return self._resp_lock_retention

    def get_x_amz_replication_status(self):
        """Get replication status of object.

        Returns
        -------
            [str]: valid response values - PENDING, COMPLETED
            or FAILED indicating object replication status.
        """
        self._resp_replication_status = self._response_headers.get(
            "x-amz-replication-status", None)
        return self._resp_replication_status

    def get_x_amz_request_charged(self):
        """Get requester value of object.

        Returns
        -------
            [str]: Requester of an object.
        """
        self._resp_charged = self._response_headers.get(
            "x-amz-request-charged", None)
        return self._resp_charged

    def get_x_amz_request_id(self):
        """Get request id of object.

        Returns
        -------
            [str]: specific request id.
        """
        self._resp_id = self._response_headers.get("x-amz-request-id", None)
        return self._resp_id

    def get_x_amz_restore(self):
        """Get the date when the restored copy expires.

        Returns
        -------
            [str]: ongoing-request and expiry-date of archived object.
        """
        self._resp_restore = self._response_headers.get("x-amz-restore", None)
        return self._resp_restore

    def get_x_amz_server_side_encryption(self):
        """Get aws kms or encryption key for object.

        Returns
        -------
            [str]: aws:kms if aws kms, else AES256.
        """
        self._resp_server_encryption = self._response_headers.get(
            "x-amz-server-side-encryption", None)
        return self._resp_server_encryption

    def get_x_amz_server_side_encryption_aws_kms_key_id(self):
        """Get aws kms id for object.

        Returns
        -------
            [str]: SSEKMSKeyId for object.
        """
        self._resp_srvenc_aws_kms = self._response_headers.get(
            "x-amz-server-side-encryption-aws-kms-key-id", None)
        return self._resp_srvenc_aws_kms

    def get_x_amz_server_side_encryption_bucket_key_enabled(self):
        """Get status of bucket key encryption for object.

        Returns
        -------
            [bool]: True if bucket key enabled, else False.
        """
        self._resp_srvenc_bucketkey = self._response_headers.get(
            "x-amz-server-side-encryption-bucket-key-enabled", None)
        if self._resp_srvenc_bucketkey is not None:
            self._resp_srvenc_bucketkey = bool(self._resp_srvenc_bucketkey)
        return self._resp_srvenc_bucketkey

    def get_x_amz_server_side_encryption_customer_algorithm(self):
        """Get encryption algorithm for object.

        Returns
        -------
            [str]: SSECustomerAlgorithm - encryption algorithm for object.
        """
        self._resp_srvenc_cust_algo = self._response_headers.get(
            "x-amz-server-side-encryption-customer-algorithm", None)
        return self._resp_srvenc_cust_algo

    def get_x_amz_server_side_encryption_customer_key_MD5(self):
        """Get encryption key for object.

        Returns
        -------
            [str]: SSECustomerKeyMD5 of object.
        """
        self._resp_srvenc_cust_key = self._response_headers.get(
            "x-amz-server-side-encryption-customer-key-MD5", None)
        return self._resp_srvenc_cust_key

    def get_x_amz_storage_class(self):
        """Get storage class of object.

        Returns
        -------
            [str]: storage class value of object.
        """
        self._resp_storage_class = self._response_headers.get(
            "x-amz-storage-class", None)
        return self._resp_storage_class

    def get_x_amz_version_id(self):
        """Get version id of object.

        Returns
        -------
            [str]: version id an object.
        """
        self._resp_version_id = self._response_headers.get(
            "x-amz-version-id", None)
        return self._resp_version_id

    def get_x_amz_website_redirect_location(self):
        """Get redirection website for object.

        Returns
        -------
            [str]: URL of redirect location.
        """
        self._resp_redirectlocation = self._response_headers.get(
            "x-amz-website-redirect-location", None)
        return self._resp_redirectlocation

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_execution_time(self):
        """Return total time for HEAD Object operation."""
        return self._timer.elapsed_time_ms()

    async def get(self, part_number):
        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name,
                                                     self._object_name)

        self._part_number = part_number

        query_params = urllib.parse.urlencode({
            'partNumber': self._part_number,
            'versionId': self._version_id
        })
        body = ""
        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'HEAD', request_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            'HEAD on {}'.format(self._session.endpoint + request_uri))
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "HEAD Request Header {}".format(headers))

        self._timer.start()
        try:
            async with self._session.get_client_session().head(
                    self._session.endpoint + request_uri,
                    params=query_params,
                    headers=headers) as resp:

                if resp.status == 200:
                    self._response_headers = dict(resp.headers)
                    self._logger.info(
                        fmt_reqid_log(self._request_id) +
                        'HEAD Object response received with' +
                        ' status code: {}'.format(resp.status))
                    self._logger.info('received reponse header {}'.format(
                        self._response_headers))

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'HEAD Object failed with http status: {}'.format(
                            resp.status) +
                        ' Error Response: {}'.format(error_msg))
                    return

                self._state = S3RequestState.RUNNING

        except aiohttp.client_exceptions.ClientConnectorError as e:
            self.remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        self._timer.stop()
        return

    def pause(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real pause action

    def resume(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real resume action

    def abort(self):
        self._state = S3RequestState.ABORTED
コード例 #9
0
class S3AsyncGetBucketReplication():
    def __init__(self, session, request_id, bucket_name):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger
        self._bucket_name = bucket_name
        self.remote_down = False
        self._http_status = None
        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_execution_time(self):
        """Return total time for GET Object operation."""
        return self._timer.elapsed_time_ms()

    @staticmethod
    def prepare_matched_rule_object(rule):
        """Initialise the attributes from matched rules."""
        policy_obj = ReplicationRule()

        if 'DeleteMarkerReplication' in rule:
            if 'Status' in rule['DeleteMarkerReplication']:
                policy_obj._delete_marker_replication_status = \
                    rule['DeleteMarkerReplication']['Status']
        if 'Destination' in rule:
            if 'Bucket' in rule['Destination']:
                policy_obj._dest_bucket = rule['Destination']['Bucket'].split(
                    ':')[-1]
            if 'EncryptionConfiguration' in rule['Destination']:
                policy_obj._encryption_replication_key_id = \
                    rule['Destination'][
                        'EncryptionConfiguration']['ReplicaKmsKeyID']
            if 'Account' in rule['Destination']:
                policy_obj._account_id = rule['Destination']['Account']
            if 'ReplicationTime' in rule['Destination']:
                policy_obj._replication_time_status = \
                    rule['Destination']['ReplicationTime']['Status']
        if 'Status' in rule:
            policy_obj._status = rule['Status']
        if 'Filter' in rule.keys():
            if 'Prefix' in rule['Filter'].keys():
                policy_obj._prefix = rule['Filter']['Prefix']
            if 'Tag' in rule['Filter'].keys():
                policy_obj._tag = rule['Filter']['Tag']
        if 'ID' in rule.keys():
            policy_obj._id = rule['ID']
        if 'Priority' in rule:
            policy_obj._priority = rule['Priority']
        return policy_obj

    def get_replication_rule(self, obj_name):
        """Returns matched replication rule for given bucket.

        Args
        ----
            [str]: object name to check against all prefixes
            in replication rules.

        Returns
        -------
            ReplicationRule type object: Matched rule if any, else None.

        """
        self._dest_bucket = None
        try:
            for key, value in (
                    self._response_dict['ReplicationConfiguration']).items():
                if key == 'Rule':
                    # Check if whether 'value' instance is list of rules
                    if isinstance(value, list):
                        # Iterate through different rules
                        for rule in value:
                            # Check if object name marches any rule prefix
                            if rule['Filter']['Prefix'] in obj_name:
                                return S3AsyncGetBucketReplication.prepare_matched_rule_object(
                                    rule)
                    # If only one rule is present
                    else:
                        if value['Filter']['Prefix'] in obj_name:
                            return self.prepare_matched_rule_object(value)

        except Exception as e:
            self._logger.error(
                "Failed to get rule! Exception type : {}".format(e))

    async def get(self):
        """Yields data chunk for given size."""
        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name)
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "request_uri : {}".format(request_uri))
        query_params = urllib.parse.urlencode({'replication': None})
        body = ""

        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'GET', request_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        # Request url
        url = self._session.endpoint + request_uri

        self._logger.info(
            fmt_reqid_log(self._request_id) + 'GET on {}'.format(url))

        self._timer.start()

        try:

            async with self._session.get_client_session().get(
                    url, params=query_params, headers=headers) as resp:
                self._logger.debug(
                    fmt_reqid_log(self._request_id) +
                    "Response url {}".format((resp.url)))
                self._logger.debug(
                    fmt_reqid_log(self._request_id) +
                    "Received response url {}".format(resp))

                if resp.status == 200:
                    self._logger.info(
                        fmt_reqid_log(self._request_id) +
                        "Received reponse [{} OK]".format(resp.status))

                    xml_resp = await resp.text()
                    self._response_dict = xmltodict.parse(xml_resp)

                    self._logger.debug('Response xml : {}\n'.format(
                        self._response_dict))

                else:
                    self._state = S3RequestState.FAILED
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'Error Response: {}'.format(error_msg))
        except Exception as e:
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Error: Exception '{}' occured!".format(e))

        self._timer.stop()
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "execution time is : {}".format(self.get_execution_time()))

        return
コード例 #10
0
class S3AsyncUploadPart:
    def __init__(self, session, request_id, bucket_name, object_name,
                 upload_id):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name

        self._upload_id = upload_id

        self.remote_down = False
        self._http_status = None

        self._etag_dict = {}
        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_response_header(self, header_key):
        """Returns response http header value."""
        if self._state == S3RequestState.COMPLETED:
            return self._response_headers[header_key]
        return None

    def get_execution_time(self):
        """Return total time for PUT Object operation."""
        return self._timer.elapsed_time_ms()

    def get_etag(self):
        """Returns ETag for object."""
        return self._response_headers["ETag"].strip("\"")

    def get_etag_dict(self):
        """Returns Etag dictionary."""
        return self._etag_dict

    # data_reader is object with fetch method that can yield data
    async def upload(self, data_reader, part_no, chunk_size):
        self._state = S3RequestState.RUNNING
        self._part_no = part_no

        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name,
                                                     self._object_name)

        print("Part Number : {}".format(self._part_no))
        query_params = urllib.parse.urlencode({
            'partNumber': self._part_no,
            'uploadId': self._upload_id
        })
        body = ""

        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'PUT', request_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        headers["Content-Length"] = str(chunk_size)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            "PUT on {}".format(self._session.endpoint + request_uri))
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "PUT with headers {}".format(headers))

        self._timer.start()
        try:
            async with self._session.get_client_session().put(
                    self._session.endpoint + request_uri,
                    headers=headers,
                    params=query_params,
                    data=data_reader.fetch(chunk_size)) as resp:
                self._timer.stop()

                self._http_status = resp.status
                self._response_headers = resp.headers

                self._logger.info(
                    fmt_reqid_log(self._request_id) +
                    'PUT Object completed with http status: {}'
                    '\n header{}'.format(resp.status, self._response_headers))

                self._etag_dict[self._part_no] = self._response_headers["Etag"]

                if resp.status == 200:
                    self._state = S3RequestState.COMPLETED
                else:
                    error_msg = await resp.text()
                    self._logger.error(
                        fmt_reqid_log(self._request_id) +
                        'Error Response: {}'.format(error_msg))
                    self._state = S3RequestState.FAILED
        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._timer.stop()
            self.remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        return
コード例 #11
0
class MultipartObjectReplicator:
    def __init__(self, job, transfer_chunk_size_bytes, source_session,
                 target_session, part_count, part_length) -> None:
        """Initialise."""
        self._transfer_chunk_size_bytes = transfer_chunk_size_bytes
        self._job_id = job.get_job_id()
        self._request_id = self._job_id
        self._timer = Timer()
        self._part_count = part_count
        self._part_length = part_length

        # A set of observers to watch for varius notifications.
        # To start with job completed (success/failure)
        self._observers = {}

        self._s3_source_session = source_session
        self._source_bucket = job.get_source_bucket_name()
        self._source_object = job.get_source_object_name()
        self._object_size = job.get_source_object_size()

        # Setup target site info
        self._s3_target_session = target_session
        self._target_bucket = job.get_target_bucket_name()

    def get_execution_time(self):
        """Return total time for Object replication."""
        return self._timer.elapsed_time_ms()

    def setup_observers(self, label, observer):
        self._observers[label] = observer

    async def start(self):
        # Start transfer
        # Create multipart upload
        self._obj_create = S3AsyncCreateMultipartUpload(
            self._s3_target_session, self._request_id, self._target_bucket,
            self._source_object)
        await self._obj_create.create()

        # Get the upload id
        upload_id = self._obj_create.get_response_header("UploadId")

        # Upload part
        self._obj_upload = S3AsyncUploadPart(self._s3_target_session,
                                             self._request_id,
                                             self._target_bucket,
                                             self._source_object, upload_id)

        self._start_bytes = 0
        part_no = 0

        for part in range(0, (len(self._part_length))):
            part_no += 1
            _logger.debug("Part Length : {}".format(self._part_length[part]))
            _logger.debug("Part Number : {}".format(part_no))

            self._object_source_reader = S3AsyncGetObject(
                self._s3_source_session, self._request_id, self._source_bucket,
                self._source_object, int(self._object_size), self._start_bytes,
                self._part_length[part])

            self._timer.start()
            await self._obj_upload.upload(self._object_source_reader, part_no,
                                          self._part_length[part])

            self._timer.stop()
            self._start_bytes = 0

        # Get the ETag dict
        e_dict = self._obj_upload.get_etag_dict()

        # Complete multipart upload
        self._obj_complete = S3AsyncCompleteMultipartUpload(
            self._s3_target_session, self._request_id, self._target_bucket,
            self._source_object, upload_id, e_dict)

        await self._obj_complete.complete_upload()
        _logger.info("Final ETag : {}".format(
            self._obj_complete.get_final_etag()))
        _logger.info("Replication completed in {}ms for job_id {}".format(
            self._timer.elapsed_time_ms(), self._job_id))

        # notify job state events
        for label, observer in self._observers.items():
            _logger.debug(
                "Notify completion to observer with label[{}]".format(label))
            if self._obj_complete.get_state() == S3RequestState.PAUSED:
                await observer.notify(JobEvents.STOPPED, self._job_id)
            elif self._obj_complete.get_state() == S3RequestState.ABORTED:
                await observer.notify(JobEvents.ABORTED, self._job_id)
            else:
                await observer.notify(JobEvents.COMPLETED, self._job_id)

        # Validation of Source and Target object ETag should be done after
        # completion of replication.
        self._obj_reader = S3AsyncGetObject(self._s3_source_session,
                                            self._request_id,
                                            self._source_bucket,
                                            self._source_object,
                                            int(self._object_size), -1, -1)

        reader_generator = self._obj_reader.fetch(
            self._transfer_chunk_size_bytes)
        async for _ in reader_generator:
            pass

        source_etag = self._obj_reader.get_etag()
        target_etag = self._obj_complete.get_final_etag()
        if source_etag == target_etag:
            _logger.info("ETag matched for job_id {}".format(self._job_id))
        else:
            _logger.error("ETag not matched for job_id {}".format(
                self._job_id))

    def pause(self):
        """Pause the running object tranfer."""
        pass  # XXX

    def resume(self):
        """Resume the running object tranfer."""
        pass  # XXX

    def abort(self):
        """Abort the running object tranfer."""
        self._object_writer.abort()
コード例 #12
0
class ObjectTagReplicator:
    def __init__(self, job, source_session,
                 target_session) -> None:
        """Initialise."""
        self._job_id = job.get_job_id()
        self._request_id = self._job_id
        self._timer = Timer()
        self._tagset = job.get_object_tagset()
        self._s3_source_session = source_session

        self._source_bucket = job.get_source_bucket_name()
        self._source_object = job.get_source_object_name()

        # A set of observers to watch for varius notifications.
        # To start with job completed (success/failure)
        self._observers = {}

        # Setup target site info
        self._s3_target_session = target_session

        self._target_bucket = job.get_target_bucket_name()
        self._target_object = job.get_source_object_name()

    def get_execution_time(self):
        """Return total time for Object replication."""
        return self._timer.elapsed_time_ms()

    def setup_observers(self, label, observer):
        self._observers[label] = observer

    async def start(self):
        # Start transfer
        object_source_tag_reader = S3AsyncGetObjectTagging(
            self._s3_source_session,
            self._request_id,
            self._source_bucket,
            self._source_object)

        self._timer.start()
        await object_source_tag_reader.fetch()
        self._timer.stop()
        _logger.info(
            "Tag read completed in {}ms for job_id {}".format(
                self._timer.elapsed_time_ms(), self._job_id))
        self._tags = object_source_tag_reader.get_tags_dict()

        object_tag_writer = S3AsyncPutObjectTagging(
            self._s3_target_session,
            self._request_id,
            self._target_bucket,
            self._target_object,
            self._tags)

        self._timer.start()
        await object_tag_writer.send()
        _logger.info(
            "Replication of tag completed in {}ms for job_id {}".format(
                self._timer.elapsed_time_ms(), self._job_id))

        # notify job state events
        for label, observer in self._observers.items():
            _logger.debug(
                "Notify completion to observer with label[{}]".format(label))
            if object_tag_writer.get_state() == S3RequestState.PAUSED:
                await observer.notify(JobEvents.STOPPED, self._job_id)
            elif object_tag_writer.get_state() == S3RequestState.ABORTED:
                await observer.notify(JobEvents.ABORTED, self._job_id)
            else:
                await observer.notify(JobEvents.COMPLETED, self._job_id)

        if JobEvents.COMPLETED:
            # check object tags count of source and target objects
            # [user-defined metadata]
            object_target_tag_reader = S3AsyncGetObjectTagging(
                self._s3_target_session,
                self._request_id,
                self._target_bucket,
                self._target_object)

            await object_target_tag_reader.fetch()
            source_tags_count = object_source_tag_reader.get_tags_count()
            target_tags_count = object_target_tag_reader.get_tags_count()

            _logger.info(
                "Object tags count : Source {} and Target {}".format(
                    source_tags_count, target_tags_count))

            if source_tags_count == target_tags_count:
                _logger.info(
                    "Object tags count matched for job_id {}".format(
                        self._job_id))
            else:
                _logger.error(
                    "Object tags count not matched for job_id {}".format(
                        self._job_id))

    def pause(self):
        """Pause the running object tranfer."""
        pass  # XXX

    def resume(self):
        """Resume the running object tranfer."""
        pass  # XXX

    def abort(self):
        """Abort the running object tranfer."""
        self._object_tag_writer.abort()
コード例 #13
0
class S3AsyncPutObject:
    def __init__(self, session, request_id, bucket_name, object_name,
                 object_size):
        """Initialise."""
        self._session = session
        # Request id for better logging.
        self._request_id = request_id
        self._logger = session.logger

        self._bucket_name = bucket_name
        self._object_name = object_name
        self._object_size = object_size

        self.remote_down = False
        self._http_status = None

        self._timer = Timer()
        self._state = S3RequestState.INITIALISED

    def get_state(self):
        """Returns current request state."""
        return self._state

    def get_response_header(self, header_key):
        """Returns response http header value."""
        if self._state == S3RequestState.COMPLETED:
            return self._response_headers[header_key]
        return None

    def get_execution_time(self):
        """Return total time for PUT Object operation."""
        return self._timer.elapsed_time_ms()

    def get_etag(self):
        """Returns ETag for object."""
        return self._response_headers["ETag"].strip("\"")

    # data_reader is object with fetch method that can yeild data
    async def send(self, data_reader, transfer_size):
        self._state = S3RequestState.RUNNING
        self._data_reader = data_reader

        request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name,
                                                     self._object_name)

        query_params = ""
        body = ""

        headers = AWSV4Signer(self._session.endpoint,
                              self._session.service_name, self._session.region,
                              self._session.access_key,
                              self._session.secret_key).prepare_signed_header(
                                  'PUT', request_uri, query_params, body)

        if (headers['Authorization'] is None):
            self._logger.error(
                fmt_reqid_log(self._request_id) +
                "Failed to generate v4 signature")
            sys.exit(-1)

        headers["Content-Length"] = str(self._object_size)

        self._logger.info(
            fmt_reqid_log(self._request_id) +
            "PUT on {}".format(self._session.endpoint + request_uri))
        self._logger.debug(
            fmt_reqid_log(self._request_id) +
            "PUT with headers {}".format(headers))
        self._timer.start()
        try:
            async with self._session.get_client_session().put(
                    self._session.endpoint + request_uri,
                    headers=headers,
                    # Read all data from data_reader
                    data=data_reader.fetch(transfer_size)) as resp:
                self._timer.stop()

                if data_reader.get_state() != S3RequestState.ABORTED:
                    self._http_status = resp.status
                    self._response_headers = resp.headers

                    self._logger.info(
                        fmt_reqid_log(self._request_id) +
                        'PUT Object completed with http status: {}'.format(
                            resp.status))

                    # Validate if upload object etag matches.
                    if self.get_etag() != data_reader.get_etag():
                        self._state = S3RequestState.FAILED
                        error_msg = "ETag mismatch."
                        self._logger.error(
                            fmt_reqid_log(self._request_id) +
                            'Error Response: {}'.format(error_msg))

                    if resp.status == 200:
                        self._state = S3RequestState.COMPLETED
                    else:
                        error_msg = await resp.text()
                        self._logger.error(
                            fmt_reqid_log(self._request_id) +
                            'Error Response: {}'.format(error_msg))
                        self._state = S3RequestState.FAILED
        except aiohttp.client_exceptions.ClientConnectorError as e:
            self._timer.stop()
            self.remote_down = True
            self._state = S3RequestState.FAILED
            self._logger.error(
                fmt_reqid_log(self._request_id) + "Failed to connect to S3: " +
                str(e))
        return

    def pause(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real pause action

    def resume(self):
        self._state = S3RequestState.PAUSED
        # XXX Take real resume action

    def abort(self):
        self._state = S3RequestState.ABORTED
        # Abort the reader so that PUT can stop.
        self._data_reader.abort()