class S3AsyncGetObject: def __init__(self, session, request_id, bucket_name, object_name, object_size, offset, length): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._object_size = object_size self._range_read_offset = offset self._range_read_length = length self.remote_down = False self._http_status = None self._object_range = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_state(self): """Returns current request state.""" return self._state def get_execution_time(self): """Return total time for GET Object operation.""" return self._timer.elapsed_time_ms() def get_etag(self): """Returns ETag for object.""" return self._response_headers["ETag"].strip("\"") def get_content_length(self): """Get content length.""" return int(self._response_headers["Content-Length"]) def get_total_object_range(self): """Get object range.""" return self._object_range # yields data chunk for given size async def fetch(self, chunk_size): request_uri = AWSV4Signer.fmt_s3_request_uri( self._bucket_name, self._object_name) query_params = "" body = "" # check for range read request if self._range_read_length >= 0: # get object range read function start_bytes = self._range_read_offset end_bytes = self._range_read_offset + self._range_read_length object_range = "bytes=" + str(start_bytes) + "-" + str(end_bytes) total_to_fetch = (end_bytes - start_bytes) + 1 else: # get object object_range = None total_to_fetch = self._object_size headers = AWSV4Signer( self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'GET', request_uri, query_params, body, object_range) if (headers['Authorization'] is None): self._logger.error(fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) self._logger.info(fmt_reqid_log(self._request_id) + 'GET on {}'.format( self._session.endpoint + request_uri)) self._logger.debug(fmt_reqid_log(self._request_id) + "GET with headers {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().get( self._session.endpoint + request_uri, headers=headers) as resp: print("response {} ".format(resp)) self._http_status = resp.status self._response_headers = resp.headers if object_range is None: if resp.status == 200: # get object successful with 200 status code self._logger.info( fmt_reqid_log(self._request_id) + 'GET Object completed with http status: {}'.format( resp.status)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'GET Object failed with http status: {}'. format(resp.status) + '\nError Response: {}'.format(error_msg)) return else: if resp.status == 206: # get object range read successful with 206 status code self._logger.info(fmt_reqid_log( self._request_id) + 'GET object range read' 'completed with http status: {}'.format( resp.status)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'GET object range read failed ' 'with http status: {}'. format(resp.status) + ' Error Response: {}'.format(error_msg)) return self._state = S3RequestState.RUNNING while True: # If abort requested, stop the loop and return. if self._state == S3RequestState.ABORTED: self._logger.debug( fmt_reqid_log(self._request_id) + "Aborted after reading %d bytes" "for object size of %d", (self._object_size - total_to_fetch, self._object_size)) break data_chunk = await resp.content.read(chunk_size) self._object_range = len(data_chunk) if not data_chunk: break self._logger.debug( fmt_reqid_log(self._request_id) + "Received data_chunk of size {} bytes.".format( len(data_chunk))) yield data_chunk total_to_fetch = total_to_fetch - len(data_chunk) if total_to_fetch == 0: # Completed reading all expected data. self._state = S3RequestState.COMPLETED break elif total_to_fetch < 0: self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Received %d more bytes than" "expected object size of %d", (total_to_fetch * -1, self._object_size)) # end of While True if self._state != S3RequestState.ABORTED: if total_to_fetch > 0: self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Received partial object." "Expected object size (%d), " "Actual received size (%d)", self._object_size, self._object_size - total_to_fetch) except aiohttp.client_exceptions.ClientConnectorError as e: self.remote_down = True self._state = S3RequestState.FAILED self._logger.error(fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) self._timer.stop() return def pause(self): self._state = S3RequestState.PAUSED # XXX Take real pause action def resume(self): self._state = S3RequestState.PAUSED # XXX Take real resume action def abort(self): self._state = S3RequestState.ABORTED
class ObjectReplicator: def __init__(self, job, transfer_chunk_size_bytes, range_read_offset, range_read_length, source_session, target_session) -> None: """Initialise.""" self._transfer_chunk_size_bytes = transfer_chunk_size_bytes self._job_id = job.get_job_id() self._request_id = self._job_id self._timer = Timer() self._range_read_offset = range_read_offset self._range_read_length = range_read_length # A set of observers to watch for varius notifications. # To start with job completed (success/failure) self._observers = {} self._s3_source_session = source_session self._object_source_reader = S3AsyncGetObject( self._s3_source_session, self._request_id, job.get_source_bucket_name(), job.get_source_object_name(), int(job.get_source_object_size()), self._range_read_offset, self._range_read_length) self._source_replication_status = S3AsyncUpdatereplicationStatus( self._s3_source_session, self._request_id, job.get_source_owner_account_id(), job.get_source_bucket_name(), job.get_source_object_name()) # Setup target site info self._s3_target_session = target_session self._object_writer = S3AsyncPutObject( self._s3_target_session, self._request_id, job.get_target_bucket_name(), job.get_source_object_name(), int(job.get_source_object_size())) self._object_target_reader = S3AsyncGetObject( self._s3_target_session, self._request_id, job.get_target_bucket_name(), job.get_source_object_name(), int(job.get_source_object_size()), self._range_read_offset, self._range_read_length) def get_execution_time(self): """Return total time for Object replication.""" return self._timer.elapsed_time_ms() def setup_observers(self, label, observer): self._observers[label] = observer async def start(self): # Start transfer self._timer.start() await self._object_writer.send(self._object_source_reader, self._transfer_chunk_size_bytes) self._timer.stop() _logger.info("Replication completed in {}ms for job_id {}".format( self._timer.elapsed_time_ms(), self._job_id)) # notify job state events for label, observer in self._observers.items(): _logger.debug( "Notify completion to observer with label[{}]".format(label)) if self._object_writer.get_state() == S3RequestState.PAUSED: await observer.notify(JobEvents.STOPPED, self._job_id) elif self._object_writer.get_state() == S3RequestState.ABORTED: await observer.notify(JobEvents.ABORTED, self._job_id) else: await observer.notify(JobEvents.COMPLETED, self._job_id) if JobEvents.COMPLETED: await self._source_replication_status.update('COMPLETED') source_etag = self._object_source_reader.get_etag() target_etag = self._object_writer.get_etag() _logger.info("MD5 : Source {} and Target {}".format( source_etag, target_etag)) # check md5 of source and replicated objects at target if source_etag == target_etag: _logger.info("MD5 matched for job_id {}".format(self._job_id)) else: _logger.error("MD5 not matched for job_id {}".format( self._job_id)) # check content length of source and target objects # [system-defined metadata] reader_generator = self._object_target_reader.fetch( self._transfer_chunk_size_bytes) async for _ in reader_generator: pass source_content_length = self._object_source_reader.get_content_length( ) target_content_length = self._object_target_reader.get_content_length( ) _logger.info("Content Length : Source {} and Target {}".format( source_content_length, target_content_length)) if source_content_length == target_content_length: _logger.info("Content length matched for job_id {}".format( self._job_id)) else: _logger.error( "Content length not matched for job_id {}".format( self._job_id)) def pause(self): """Pause the running object tranfer.""" pass # XXX def resume(self): """Resume the running object tranfer.""" pass # XXX def abort(self): """Abort the running object tranfer.""" self._object_writer.abort()
class S3AsyncGetObjectTagging: def __init__(self, session, request_id, bucket_name, object_name): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_state(self): """Returns current request state.""" return self._state def get_execution_time(self): """Return total time for GET operation.""" return self._timer.elapsed_time_ms() def get_tags_count(self): """Returns tags count.""" return len(self._response_tags_dict) def get_tags_dict(self): """Returns tags dictionary.""" return self._response_tags_dict def get_tags_value(self, key): """Returns the value for the given key.""" self._resp_tags_value = self._response_tags_dict.get(key, None) return self._resp_tags_value async def fetch(self): request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name, self._object_name) query_params = urllib.parse.urlencode({'tagging': None}) body = "" headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'GET', request_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) self._logger.info( fmt_reqid_log(self._request_id) + 'GET on {}'.format(self._session.endpoint + request_uri)) self._logger.debug( fmt_reqid_log(self._request_id) + "GET Request Header {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().get( self._session.endpoint + request_uri, params=query_params, headers=headers) as resp: self._logger.info( fmt_reqid_log(self._request_id) + 'GET response received with' + ' status code: {}'.format(resp.status)) self._logger.info( 'Response url {}'.format(self._session.endpoint + request_uri)) if resp.status == 200: self._response_headers = resp.headers received_tagset = await resp.text() self._logger.info( "Received tagset {}".format(received_tagset)) # Remove namespace using regular expression # search and replace given pattern from the given string received_tagset = re.sub('xmlns="[^"]+"', '', received_tagset) # Parse XML response root = fromstring(received_tagset) tags_dict = {} # Find all Tags elements in the entire tree. for ele in root.findall(".//Tag"): key = ele.find('Key').text value = ele.find('Value').text tags_dict[key] = value self._response_tags_dict = tags_dict else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'GET failed with http status: {}'.format(resp.status) + 'Error Response: {}'.format(error_msg)) return except aiohttp.client_exceptions.ClientConnectorError as e: self._remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) self._timer.stop() return
class S3AsyncCompleteMultipartUpload: def __init__(self, session, request_id, bucket_name, object_name, upload_id, etag_dict): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._upload_id = upload_id self._etag_dict = etag_dict self._remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_state(self): """Returns current request state.""" return self._state def get_response_header(self, header_key): """Returns response http header value.""" self._resp_header_key = self._response_headers.get(header_key, None) return self._resp_header_key def get_execution_time(self): """Return total time for GET operation.""" return self._timer.elapsed_time_ms() def get_final_etag(self): """Returns final etag after multipart completion.""" return self._final_etag async def complete_upload(self): self._state = S3RequestState.RUNNING request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name, self._object_name) query_params = urllib.parse.urlencode({'uploadId': self._upload_id}) body = "" # Prepare xml format etag_str = "<CompleteMultipartUpload>" for part, etag in self._etag_dict.items(): etag_str += "<Part><ETag>" + \ str(etag) + "</ETag><PartNumber>" + str(part) + "</PartNumber></Part>" etag_str += "</CompleteMultipartUpload>" headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'POST', request_uri, query_params, body) # check the header signature if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) self._logger.info( fmt_reqid_log(self._request_id) + 'POST on {}'.format(self._session.endpoint + request_uri)) self._logger.debug( fmt_reqid_log(self._request_id) + "POST Request Header {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().post( self._session.endpoint + request_uri, data=etag_str, params=query_params, headers=headers) as resp: self._logger.info( fmt_reqid_log(self._request_id) + 'POST response received with' + ' status code: {}'.format(resp.status)) self._logger.info( 'Response url {}'.format(self._session.endpoint + request_uri)) if resp.status == 200: self._state = S3RequestState.COMPLETED # Get the response header and body self._response_headers = resp.headers self._logger.info('Response headers {}'.format( self._response_headers)) # Response body resp_body = await resp.text() # Remove the namespace from response body elements resp_body = re.sub('xmlns="[^"]+"', '', resp_body) xml_dict = fromstring(resp_body) # Get the ETag from response body self._final_etag = xml_dict.find('ETag').text else: # show the error messages self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'POST failed with http status: {}'.format(resp.status) + ' Error Response: {}'.format(error_msg)) return except aiohttp.client_exceptions.ClientConnectorError as e: self._remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) self._timer.stop() return
class ReplicationManager: def __init__(self, manager_endpoint): """Initialise ReplicationManager object.""" # Id generated locally. self.id = str(uuid.uuid4()) self.endpoint = manager_endpoint # Id returned for remote replication manager after subscribe. self.subscriber_id = None self.client_session = aiohttp.ClientSession() self._timer = Timer() self._state = S3RequestState.INITIALISED async def close(self): await self.client_session.close() def get_dictionary(self): return { "id": self.id, "endpoint": self.endpoint, "subscriber_id": self.subscriber_id } async def subscribe(self, replicator_endpoint, prefetch_count): """Subscribe to remote replication manager for jobs. Args ----- replicator_endpoint (str): url for replicator (current process). prefetch_count (int): maximum count of jobs to receive from replication manager. Returns ------- bool: True when subscribed successfully, False when failed. """ subscriber_payload = subscribe_payload_template() subscriber_payload.pop("id") # replication manager will generate. subscriber_payload["endpoint"] = replicator_endpoint subscriber_payload["prefetch_count"] = prefetch_count resource_url = url_with_resources(self.endpoint, ["subscribers"]) req_id = str(uuid.uuid4()) _logger.info(fmt_reqid_log(req_id) + "PUT on {}".format(resource_url)) self._timer.start() try: self._state = S3RequestState.RUNNING async with self.client_session.post( resource_url, json=subscriber_payload) as response: self._timer.stop() _logger.info( fmt_reqid_log(req_id) + 'HTTP Response: Status: {}'.format(response.status)) if response.status == 201: # CREATED # Subscribed successfully. self._state = S3RequestState.COMPLETED response_body = await response.json() _logger.debug( fmt_reqid_log(req_id) + 'HTTP Response: Body: {}'.format(response_body)) self.subscriber_id = response_body["id"] else: # Failed to Subscribe. self._state = S3RequestState.FAILED _logger.error( fmt_reqid_log(req_id) + "Failed to Subscribe.") except aiohttp.client_exceptions.ClientConnectorError as e: self._timer.stop() self._state = S3RequestState.FAILED _logger.error( fmt_reqid_log(req_id) + "Failed to connect to Replication Manager: " + str(e)) if self._state == S3RequestState.COMPLETED: return True else: return False # Post the job status update to replicator. async def send_update(self, job_id, status): """Updates replication manager with job status. Args ----- job_id (str): Job ID at the replication manager. status (str): completed/failed/aborted. Returns ------- bool: True when status updated successfully, False when failed. """ headers = {"Content-Type": "application/json"} payload = {"status": status} resource_url = url_with_resources(self.endpoint, ["jobs", job_id]) req_id = str(uuid.uuid4()) _logger.info(fmt_reqid_log(req_id) + 'PUT on {}'.format(resource_url)) _logger.debug( fmt_reqid_log(req_id) + "PUT with headers {}".format(headers)) _logger.debug(fmt_reqid_log(req_id) + "PUT content {}".format(payload)) self._timer.start() try: self._state = S3RequestState.RUNNING async with self.client_session.put(resource_url, headers=headers, json=payload) as resp: self._timer.stop() self._response_headers = resp.headers self.http_status = resp.status self.response = await resp.json() _logger.info( fmt_reqid_log(req_id) + 'PUT on {} returned http status: {}'.format( resource_url, resp.status)) if resp.status == 200: self._state = S3RequestState.COMPLETED _logger.info( fmt_reqid_log(req_id) + 'PUT on {} returned Response: {}'.format( resource_url, self.response)) else: self._state = S3RequestState.FAILED _logger.error( fmt_reqid_log(req_id) + 'PUT on {} returned Response: {}'.format( resource_url, self.response)) except aiohttp.client_exceptions.ClientConnectorError as e: self._timer.stop() self._state = S3RequestState.FAILED self.remote_down = True _logger.error('Failed to connect to Replication manager: ' + str(e)) if self._state == S3RequestState.COMPLETED: return True else: return False
class S3AsyncPutObjectTagging: def __init__(self, session, request_id, bucket_name, object_name, obj_tag_set): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._tag_set = obj_tag_set self._remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_state(self): """Returns current request state.""" return self._state def get_execution_time(self): """Return total time for GET operation.""" return self._timer.elapsed_time_ms() async def send(self): request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name, self._object_name) query_params = urllib.parse.urlencode({'tagging': ''}) body = "" # Prepare tag xml format tag_str1 = "<Tagging><TagSet>" tag_str2 = "</TagSet></Tagging>" result = "" for key, val in (self._tag_set).items(): result = result + "<Tag><Key>" + key + "</Key><Value>" + val + "</Value></Tag>" tagset = tag_str1 + result + tag_str2 headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'PUT', request_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) self._logger.info( fmt_reqid_log(self._request_id) + 'PUT on {}'.format(self._session.endpoint + request_uri)) self._logger.debug( fmt_reqid_log(self._request_id) + "PUT Request Header {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().put( self._session.endpoint + request_uri, data=tagset, params=query_params, headers=headers) as resp: self._logger.info( fmt_reqid_log(self._request_id) + 'PUT response received with' + ' status code: {}'.format(resp.status)) self._logger.info( 'Response url {}'.format(self._session.endpoint + request_uri)) if resp.status == 200: self._response_headers = resp.headers self._logger.info('Response headers {}'.format( self._response_headers)) # Delete temporary tagset file. os.system('rm -rf tagset.xml') else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'PUT failed with http status: {}'.format(resp.status) + ' Error Response: {}'.format(error_msg)) return except aiohttp.client_exceptions.ClientConnectorError as e: self._remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) self._timer.stop() return
class S3AsyncUpdatereplicationStatus: def __init__(self, session, request_id, account_id, bucket_name, object_name): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._account_id = account_id self._bucket_name = bucket_name self._object_name = object_name self._remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED self._bucket_metadata_index_id = "AAAAAAAAAHg=-AgAQAAAAAAA=" def get_state(self): """Returns current request state.""" return self._state def get_execution_time(self): """Return total time for HEAD Object operation.""" return self._timer.elapsed_time_ms() def kv_session(self, index, key, value=None): """Set up connection context for admin KV store API.""" canonical_uri = '/indexes/{}/{}'.format( urllib.parse.quote(index, safe=""), urllib.parse.quote(key)) request_uri = self._session.admin_endpoint + canonical_uri query_params = "" body = value or "" headers = AWSV4Signer(self._session.admin_endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'GET' if value is None else 'PUT', canonical_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) self._logger.info( fmt_reqid_log(self._request_id) + 'Motr index operation on {} {}'.format(request_uri, body)) if value is None: # Called without a new value, assumed to be an HTTP GET return self._session.get_client_session().get(URL(request_uri, encoded=True), params=query_params, headers=headers) else: # Going to PUT the new value return self._session.get_client_session().put(URL(request_uri, encoded=True), params=query_params, headers=headers, data=body.encode()) async def update(self, status): """Use KV store admin API to update x-amz-replication-status.""" self._timer.start() self._state = S3RequestState.RUNNING try: # After integration with service account, # this might become mandatory. Skip for now to # avoid breaking existing code. if self._session.admin_endpoint is None: self._logger.warn( fmt_reqid_log(self._request_id) + 'Admin API not configured, ' + 'skipping source metadata update') self._state = S3RequestState.COMPLETED return # Step 1. Get bucket metadata # This is needed to figure out the Motr index holding # object metadata for this bucket. async with self.kv_session( self._bucket_metadata_index_id, # The key in the bucket index is of the form # <account-id>/<bucket-name> self._account_id + '/' + self._bucket_name) as resp: if resp.status == 200: bucket_metadata = await resp.json(content_type=None) self._logger.info( fmt_reqid_log(self._request_id) + 'Bucket index lookup for {} response'.format( self._bucket_name) + ' received with status code: {}'.format(resp.status)) self._logger.debug( 'bucket metadata: {}'.format(bucket_metadata)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'Index operation failed with http status: {}'.format( resp.status) + ' Error Response: {}'.format(error_msg)) return # Magic part: the object list index layout seems to be a # base64 encoded memory dump of a C struct. We first decode, # then slice the high and low 64 bit integer values of the # Motr index ID we want. The server expects this 128 bit ID # as base64 encoded halves separated by a dash, like # AAAAAAAAAHg=-AgAQAAAAAAA= layout = base64.b64decode( bucket_metadata['motr_object_list_index_layout']) id_hi = base64.b64encode(layout[0:8]).decode() id_lo = base64.b64encode(layout[8:16]).decode() metadata_index = id_hi + '-' + id_lo # Step 2. GET object metadata async with self.kv_session(metadata_index, self._object_name) as resp: if resp.status == 200: object_metadata = await resp.json(content_type=None) self._logger.info( fmt_reqid_log(self._request_id) + 'Object index lookup for {} in {}'.format( self._object_name, metadata_index) + ' response received with' + ' status code: {}'.format(resp.status)) self._logger.debug( 'object metadata: {}'.format(object_metadata)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'Index operation failed with http status: {}'.format( resp.status) + ' Error Response: {}'.format(error_msg)) return # Step 3. Set replication status to the provided value object_metadata['x-amz-replication-status'] = status # Step 4. PUT updated object metadata async with self.kv_session(metadata_index, self._object_name, json.dumps(object_metadata)) as resp: if resp.status == 200: self._logger.info( fmt_reqid_log(self._request_id) + 'Set x-amz-replication-status for ' + '{} to {}, response received with'.format( self._object_name, status) + ' status code: {}'.format(resp.status)) self._logger.debug( 'updated object metadata: {}'.format(object_metadata)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'Index operation failed with http status: {}'.format( resp.status) + ' Error Response: {}'.format(error_msg)) return self._state = S3RequestState.COMPLETED except aiohttp.client_exceptions.ClientConnectorError as e: self._remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) finally: self._timer.stop() def pause(self): self._state = S3RequestState.PAUSED # XXX Take real pause action def resume(self): self._state = S3RequestState.PAUSED # XXX Take real resume action def abort(self): self._state = S3RequestState.ABORTED
class S3AsyncHeadObject: def __init__(self, session, request_id, bucket_name, object_name, version_id): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._version_id = version_id self.remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_accept_ranges(self): """Get range of bytes for object. Returns ------- [str]: indicates that a range of bytes was specified for object. """ self._resp_accept_range = self._response_headers.get( "Accept-Ranges", None) return self._resp_accept_range def get_cache_control(self): """Get caching behavior for object. Returns ------- [str]: if set, returns cache policy and maximum age before expiring. """ self._resp_cache_control = self._response_headers.get( "Cache-Control", None) return self._resp_cache_control def get_content_disposition(self): """Get presentational information for object. Returns ------- [str]: attached filename/information for object. """ self._resp_content_disposition = self._response_headers.get( "Content-Disposition", None) return self._resp_content_disposition def get_content_encoding(self): """Get content encodings for object. Returns ------- [str]: specifies content encodings applied to object. """ self._resp_content_encoding = self._response_headers.get( "Content-Encoding", None) return self._resp_content_encoding def get_content_language(self): """Get content language for object. Returns ------- [str]: specify language the object content is in. """ self._resp_content_lang = self._response_headers.get( "Content-Language", None) return self._resp_content_lang def get_content_length(self): """Get content length of object. Returns ------- [int]: total content length of object. """ self._resp_content_length = self._response_headers.get( "Content-Length", None) if self._resp_content_length is not None: self._resp_content_length = int(self._resp_content_length) return self._resp_content_length def get_content_type(self): """Get content type for object. Returns ------- [str]: format of object data. """ self._resp_content_type = self._response_headers.get( "Content-Type", None) return self._resp_content_type def get_etag(self): """Get etag for object. Returns ------- [str]: opaque identifier. """ self._resp_etag = self._response_headers.get("Etag", None) return self._resp_etag def get_expires(self): """Get date and time for object. Returns ------- [str]: date and time at which the object is no longer cacheable. """ self._resp_expires = self._response_headers.get("Expires", None) return self._resp_expires def get_last_modified(self): """Get last creation date of object. Returns ------- [str]: date of the object. """ self._resp_last_modified = self._response_headers.get( "Last-Modified", None) return self._resp_last_modified def get_server_name(self): """Get server name. Returns ------- [str]: server name (SeagateS3 / AmazonS3). """ self._resp_server_name = self._response_headers.get("Server", None) return self._resp_server_name def get_x_amz_archive_status(self): """Get archive state of the object. Returns ------- [str]: archieve state (ARCHIVE_ACCESS / DEEP_ARCHIVE_ACCESS) """ self._resp_archive_status = self._response_headers.get( "x-amz-archive-status", None) return self._resp_archive_status def get_x_amz_delete_marker(self): """Get delete marker status for object. Returns ------- [bool]: True if object retrived was a Delete Marker, else False. """ self._resp_delete_marker = self._response_headers.get( "x-amz-delete-marker", None) if self._resp_delete_marker is not None: self._resp_delete_marker = bool(self._resp_delete_marker) return self._resp_delete_marker def get_x_amz_expiration(self): """Get expiration configuration of object. Returns ------- [str]: expiry date and rule-id, if enabled. """ self._resp_expiration = self._response_headers.get( "x-amz-expiration", None) return self._resp_expiration def get_x_amz_missing_meta(self): """Get missing metadata entries of object. Returns ------- [int]: value of the number of unprintable metadata entries. """ self._resp_missing_data = self._response_headers.get( "x-amz-missing-meta", None) if self._resp_missing_data is not None: self._resp_missing_data = int(self._resp_missing_data) return self._resp_missing_data def get_x_amz_mp_parts_count(self): """Get part counts of object. Returns ------- [int]: total part count of an object. """ self._resp_parts_count = self._response_headers.get( "x-amz-mp-parts-count", None) if self._resp_parts_count is not None: self._resp_parts_count = int(self._resp_parts_count) return self._resp_parts_count def get_x_amz_object_lock_legal_hold(self): """Get legal hold status value for the object. Returns ------- [str]: ON if a legal hold is in effect for the object, else OFF. """ self._resp_legal_hold = self._response_headers.get( "x-amz-object-lock-legal-hold", None) return self._resp_legal_hold def get_x_amz_object_lock_mode(self): """Get lock mode of object. Returns ------- [str]: Valid response values - GOVERNANCE / COMPLIANCE. """ self._resp_lock_mode = self._response_headers.get( "x-amz-object-lock-mode", None) return self._resp_lock_mode def get_x_amz_object_lock_retain_until_date(self): """Get date and time retention period expires of object. Returns ------- [str]: date and time when retention period expires. """ self._resp_lock_retention = self._response_headers.get( "x-amz-object-lock-retain-until-date", None) return self._resp_lock_retention def get_x_amz_replication_status(self): """Get replication status of object. Returns ------- [str]: valid response values - PENDING, COMPLETED or FAILED indicating object replication status. """ self._resp_replication_status = self._response_headers.get( "x-amz-replication-status", None) return self._resp_replication_status def get_x_amz_request_charged(self): """Get requester value of object. Returns ------- [str]: Requester of an object. """ self._resp_charged = self._response_headers.get( "x-amz-request-charged", None) return self._resp_charged def get_x_amz_request_id(self): """Get request id of object. Returns ------- [str]: specific request id. """ self._resp_id = self._response_headers.get("x-amz-request-id", None) return self._resp_id def get_x_amz_restore(self): """Get the date when the restored copy expires. Returns ------- [str]: ongoing-request and expiry-date of archived object. """ self._resp_restore = self._response_headers.get("x-amz-restore", None) return self._resp_restore def get_x_amz_server_side_encryption(self): """Get aws kms or encryption key for object. Returns ------- [str]: aws:kms if aws kms, else AES256. """ self._resp_server_encryption = self._response_headers.get( "x-amz-server-side-encryption", None) return self._resp_server_encryption def get_x_amz_server_side_encryption_aws_kms_key_id(self): """Get aws kms id for object. Returns ------- [str]: SSEKMSKeyId for object. """ self._resp_srvenc_aws_kms = self._response_headers.get( "x-amz-server-side-encryption-aws-kms-key-id", None) return self._resp_srvenc_aws_kms def get_x_amz_server_side_encryption_bucket_key_enabled(self): """Get status of bucket key encryption for object. Returns ------- [bool]: True if bucket key enabled, else False. """ self._resp_srvenc_bucketkey = self._response_headers.get( "x-amz-server-side-encryption-bucket-key-enabled", None) if self._resp_srvenc_bucketkey is not None: self._resp_srvenc_bucketkey = bool(self._resp_srvenc_bucketkey) return self._resp_srvenc_bucketkey def get_x_amz_server_side_encryption_customer_algorithm(self): """Get encryption algorithm for object. Returns ------- [str]: SSECustomerAlgorithm - encryption algorithm for object. """ self._resp_srvenc_cust_algo = self._response_headers.get( "x-amz-server-side-encryption-customer-algorithm", None) return self._resp_srvenc_cust_algo def get_x_amz_server_side_encryption_customer_key_MD5(self): """Get encryption key for object. Returns ------- [str]: SSECustomerKeyMD5 of object. """ self._resp_srvenc_cust_key = self._response_headers.get( "x-amz-server-side-encryption-customer-key-MD5", None) return self._resp_srvenc_cust_key def get_x_amz_storage_class(self): """Get storage class of object. Returns ------- [str]: storage class value of object. """ self._resp_storage_class = self._response_headers.get( "x-amz-storage-class", None) return self._resp_storage_class def get_x_amz_version_id(self): """Get version id of object. Returns ------- [str]: version id an object. """ self._resp_version_id = self._response_headers.get( "x-amz-version-id", None) return self._resp_version_id def get_x_amz_website_redirect_location(self): """Get redirection website for object. Returns ------- [str]: URL of redirect location. """ self._resp_redirectlocation = self._response_headers.get( "x-amz-website-redirect-location", None) return self._resp_redirectlocation def get_state(self): """Returns current request state.""" return self._state def get_execution_time(self): """Return total time for HEAD Object operation.""" return self._timer.elapsed_time_ms() async def get(self, part_number): request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name, self._object_name) self._part_number = part_number query_params = urllib.parse.urlencode({ 'partNumber': self._part_number, 'versionId': self._version_id }) body = "" headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'HEAD', request_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) self._logger.info( fmt_reqid_log(self._request_id) + 'HEAD on {}'.format(self._session.endpoint + request_uri)) self._logger.debug( fmt_reqid_log(self._request_id) + "HEAD Request Header {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().head( self._session.endpoint + request_uri, params=query_params, headers=headers) as resp: if resp.status == 200: self._response_headers = dict(resp.headers) self._logger.info( fmt_reqid_log(self._request_id) + 'HEAD Object response received with' + ' status code: {}'.format(resp.status)) self._logger.info('received reponse header {}'.format( self._response_headers)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'HEAD Object failed with http status: {}'.format( resp.status) + ' Error Response: {}'.format(error_msg)) return self._state = S3RequestState.RUNNING except aiohttp.client_exceptions.ClientConnectorError as e: self.remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) self._timer.stop() return def pause(self): self._state = S3RequestState.PAUSED # XXX Take real pause action def resume(self): self._state = S3RequestState.PAUSED # XXX Take real resume action def abort(self): self._state = S3RequestState.ABORTED
class S3AsyncGetBucketReplication(): def __init__(self, session, request_id, bucket_name): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self.remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_execution_time(self): """Return total time for GET Object operation.""" return self._timer.elapsed_time_ms() @staticmethod def prepare_matched_rule_object(rule): """Initialise the attributes from matched rules.""" policy_obj = ReplicationRule() if 'DeleteMarkerReplication' in rule: if 'Status' in rule['DeleteMarkerReplication']: policy_obj._delete_marker_replication_status = \ rule['DeleteMarkerReplication']['Status'] if 'Destination' in rule: if 'Bucket' in rule['Destination']: policy_obj._dest_bucket = rule['Destination']['Bucket'].split( ':')[-1] if 'EncryptionConfiguration' in rule['Destination']: policy_obj._encryption_replication_key_id = \ rule['Destination'][ 'EncryptionConfiguration']['ReplicaKmsKeyID'] if 'Account' in rule['Destination']: policy_obj._account_id = rule['Destination']['Account'] if 'ReplicationTime' in rule['Destination']: policy_obj._replication_time_status = \ rule['Destination']['ReplicationTime']['Status'] if 'Status' in rule: policy_obj._status = rule['Status'] if 'Filter' in rule.keys(): if 'Prefix' in rule['Filter'].keys(): policy_obj._prefix = rule['Filter']['Prefix'] if 'Tag' in rule['Filter'].keys(): policy_obj._tag = rule['Filter']['Tag'] if 'ID' in rule.keys(): policy_obj._id = rule['ID'] if 'Priority' in rule: policy_obj._priority = rule['Priority'] return policy_obj def get_replication_rule(self, obj_name): """Returns matched replication rule for given bucket. Args ---- [str]: object name to check against all prefixes in replication rules. Returns ------- ReplicationRule type object: Matched rule if any, else None. """ self._dest_bucket = None try: for key, value in ( self._response_dict['ReplicationConfiguration']).items(): if key == 'Rule': # Check if whether 'value' instance is list of rules if isinstance(value, list): # Iterate through different rules for rule in value: # Check if object name marches any rule prefix if rule['Filter']['Prefix'] in obj_name: return S3AsyncGetBucketReplication.prepare_matched_rule_object( rule) # If only one rule is present else: if value['Filter']['Prefix'] in obj_name: return self.prepare_matched_rule_object(value) except Exception as e: self._logger.error( "Failed to get rule! Exception type : {}".format(e)) async def get(self): """Yields data chunk for given size.""" request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name) self._logger.debug( fmt_reqid_log(self._request_id) + "request_uri : {}".format(request_uri)) query_params = urllib.parse.urlencode({'replication': None}) body = "" headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'GET', request_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) # Request url url = self._session.endpoint + request_uri self._logger.info( fmt_reqid_log(self._request_id) + 'GET on {}'.format(url)) self._timer.start() try: async with self._session.get_client_session().get( url, params=query_params, headers=headers) as resp: self._logger.debug( fmt_reqid_log(self._request_id) + "Response url {}".format((resp.url))) self._logger.debug( fmt_reqid_log(self._request_id) + "Received response url {}".format(resp)) if resp.status == 200: self._logger.info( fmt_reqid_log(self._request_id) + "Received reponse [{} OK]".format(resp.status)) xml_resp = await resp.text() self._response_dict = xmltodict.parse(xml_resp) self._logger.debug('Response xml : {}\n'.format( self._response_dict)) else: self._state = S3RequestState.FAILED error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'Error Response: {}'.format(error_msg)) except Exception as e: self._logger.error( fmt_reqid_log(self._request_id) + "Error: Exception '{}' occured!".format(e)) self._timer.stop() self._logger.debug( fmt_reqid_log(self._request_id) + "execution time is : {}".format(self.get_execution_time())) return
class S3AsyncUploadPart: def __init__(self, session, request_id, bucket_name, object_name, upload_id): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._upload_id = upload_id self.remote_down = False self._http_status = None self._etag_dict = {} self._timer = Timer() self._state = S3RequestState.INITIALISED def get_state(self): """Returns current request state.""" return self._state def get_response_header(self, header_key): """Returns response http header value.""" if self._state == S3RequestState.COMPLETED: return self._response_headers[header_key] return None def get_execution_time(self): """Return total time for PUT Object operation.""" return self._timer.elapsed_time_ms() def get_etag(self): """Returns ETag for object.""" return self._response_headers["ETag"].strip("\"") def get_etag_dict(self): """Returns Etag dictionary.""" return self._etag_dict # data_reader is object with fetch method that can yield data async def upload(self, data_reader, part_no, chunk_size): self._state = S3RequestState.RUNNING self._part_no = part_no request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name, self._object_name) print("Part Number : {}".format(self._part_no)) query_params = urllib.parse.urlencode({ 'partNumber': self._part_no, 'uploadId': self._upload_id }) body = "" headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'PUT', request_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) headers["Content-Length"] = str(chunk_size) self._logger.info( fmt_reqid_log(self._request_id) + "PUT on {}".format(self._session.endpoint + request_uri)) self._logger.debug( fmt_reqid_log(self._request_id) + "PUT with headers {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().put( self._session.endpoint + request_uri, headers=headers, params=query_params, data=data_reader.fetch(chunk_size)) as resp: self._timer.stop() self._http_status = resp.status self._response_headers = resp.headers self._logger.info( fmt_reqid_log(self._request_id) + 'PUT Object completed with http status: {}' '\n header{}'.format(resp.status, self._response_headers)) self._etag_dict[self._part_no] = self._response_headers["Etag"] if resp.status == 200: self._state = S3RequestState.COMPLETED else: error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'Error Response: {}'.format(error_msg)) self._state = S3RequestState.FAILED except aiohttp.client_exceptions.ClientConnectorError as e: self._timer.stop() self.remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) return
class MultipartObjectReplicator: def __init__(self, job, transfer_chunk_size_bytes, source_session, target_session, part_count, part_length) -> None: """Initialise.""" self._transfer_chunk_size_bytes = transfer_chunk_size_bytes self._job_id = job.get_job_id() self._request_id = self._job_id self._timer = Timer() self._part_count = part_count self._part_length = part_length # A set of observers to watch for varius notifications. # To start with job completed (success/failure) self._observers = {} self._s3_source_session = source_session self._source_bucket = job.get_source_bucket_name() self._source_object = job.get_source_object_name() self._object_size = job.get_source_object_size() # Setup target site info self._s3_target_session = target_session self._target_bucket = job.get_target_bucket_name() def get_execution_time(self): """Return total time for Object replication.""" return self._timer.elapsed_time_ms() def setup_observers(self, label, observer): self._observers[label] = observer async def start(self): # Start transfer # Create multipart upload self._obj_create = S3AsyncCreateMultipartUpload( self._s3_target_session, self._request_id, self._target_bucket, self._source_object) await self._obj_create.create() # Get the upload id upload_id = self._obj_create.get_response_header("UploadId") # Upload part self._obj_upload = S3AsyncUploadPart(self._s3_target_session, self._request_id, self._target_bucket, self._source_object, upload_id) self._start_bytes = 0 part_no = 0 for part in range(0, (len(self._part_length))): part_no += 1 _logger.debug("Part Length : {}".format(self._part_length[part])) _logger.debug("Part Number : {}".format(part_no)) self._object_source_reader = S3AsyncGetObject( self._s3_source_session, self._request_id, self._source_bucket, self._source_object, int(self._object_size), self._start_bytes, self._part_length[part]) self._timer.start() await self._obj_upload.upload(self._object_source_reader, part_no, self._part_length[part]) self._timer.stop() self._start_bytes = 0 # Get the ETag dict e_dict = self._obj_upload.get_etag_dict() # Complete multipart upload self._obj_complete = S3AsyncCompleteMultipartUpload( self._s3_target_session, self._request_id, self._target_bucket, self._source_object, upload_id, e_dict) await self._obj_complete.complete_upload() _logger.info("Final ETag : {}".format( self._obj_complete.get_final_etag())) _logger.info("Replication completed in {}ms for job_id {}".format( self._timer.elapsed_time_ms(), self._job_id)) # notify job state events for label, observer in self._observers.items(): _logger.debug( "Notify completion to observer with label[{}]".format(label)) if self._obj_complete.get_state() == S3RequestState.PAUSED: await observer.notify(JobEvents.STOPPED, self._job_id) elif self._obj_complete.get_state() == S3RequestState.ABORTED: await observer.notify(JobEvents.ABORTED, self._job_id) else: await observer.notify(JobEvents.COMPLETED, self._job_id) # Validation of Source and Target object ETag should be done after # completion of replication. self._obj_reader = S3AsyncGetObject(self._s3_source_session, self._request_id, self._source_bucket, self._source_object, int(self._object_size), -1, -1) reader_generator = self._obj_reader.fetch( self._transfer_chunk_size_bytes) async for _ in reader_generator: pass source_etag = self._obj_reader.get_etag() target_etag = self._obj_complete.get_final_etag() if source_etag == target_etag: _logger.info("ETag matched for job_id {}".format(self._job_id)) else: _logger.error("ETag not matched for job_id {}".format( self._job_id)) def pause(self): """Pause the running object tranfer.""" pass # XXX def resume(self): """Resume the running object tranfer.""" pass # XXX def abort(self): """Abort the running object tranfer.""" self._object_writer.abort()
class ObjectTagReplicator: def __init__(self, job, source_session, target_session) -> None: """Initialise.""" self._job_id = job.get_job_id() self._request_id = self._job_id self._timer = Timer() self._tagset = job.get_object_tagset() self._s3_source_session = source_session self._source_bucket = job.get_source_bucket_name() self._source_object = job.get_source_object_name() # A set of observers to watch for varius notifications. # To start with job completed (success/failure) self._observers = {} # Setup target site info self._s3_target_session = target_session self._target_bucket = job.get_target_bucket_name() self._target_object = job.get_source_object_name() def get_execution_time(self): """Return total time for Object replication.""" return self._timer.elapsed_time_ms() def setup_observers(self, label, observer): self._observers[label] = observer async def start(self): # Start transfer object_source_tag_reader = S3AsyncGetObjectTagging( self._s3_source_session, self._request_id, self._source_bucket, self._source_object) self._timer.start() await object_source_tag_reader.fetch() self._timer.stop() _logger.info( "Tag read completed in {}ms for job_id {}".format( self._timer.elapsed_time_ms(), self._job_id)) self._tags = object_source_tag_reader.get_tags_dict() object_tag_writer = S3AsyncPutObjectTagging( self._s3_target_session, self._request_id, self._target_bucket, self._target_object, self._tags) self._timer.start() await object_tag_writer.send() _logger.info( "Replication of tag completed in {}ms for job_id {}".format( self._timer.elapsed_time_ms(), self._job_id)) # notify job state events for label, observer in self._observers.items(): _logger.debug( "Notify completion to observer with label[{}]".format(label)) if object_tag_writer.get_state() == S3RequestState.PAUSED: await observer.notify(JobEvents.STOPPED, self._job_id) elif object_tag_writer.get_state() == S3RequestState.ABORTED: await observer.notify(JobEvents.ABORTED, self._job_id) else: await observer.notify(JobEvents.COMPLETED, self._job_id) if JobEvents.COMPLETED: # check object tags count of source and target objects # [user-defined metadata] object_target_tag_reader = S3AsyncGetObjectTagging( self._s3_target_session, self._request_id, self._target_bucket, self._target_object) await object_target_tag_reader.fetch() source_tags_count = object_source_tag_reader.get_tags_count() target_tags_count = object_target_tag_reader.get_tags_count() _logger.info( "Object tags count : Source {} and Target {}".format( source_tags_count, target_tags_count)) if source_tags_count == target_tags_count: _logger.info( "Object tags count matched for job_id {}".format( self._job_id)) else: _logger.error( "Object tags count not matched for job_id {}".format( self._job_id)) def pause(self): """Pause the running object tranfer.""" pass # XXX def resume(self): """Resume the running object tranfer.""" pass # XXX def abort(self): """Abort the running object tranfer.""" self._object_tag_writer.abort()
class S3AsyncPutObject: def __init__(self, session, request_id, bucket_name, object_name, object_size): """Initialise.""" self._session = session # Request id for better logging. self._request_id = request_id self._logger = session.logger self._bucket_name = bucket_name self._object_name = object_name self._object_size = object_size self.remote_down = False self._http_status = None self._timer = Timer() self._state = S3RequestState.INITIALISED def get_state(self): """Returns current request state.""" return self._state def get_response_header(self, header_key): """Returns response http header value.""" if self._state == S3RequestState.COMPLETED: return self._response_headers[header_key] return None def get_execution_time(self): """Return total time for PUT Object operation.""" return self._timer.elapsed_time_ms() def get_etag(self): """Returns ETag for object.""" return self._response_headers["ETag"].strip("\"") # data_reader is object with fetch method that can yeild data async def send(self, data_reader, transfer_size): self._state = S3RequestState.RUNNING self._data_reader = data_reader request_uri = AWSV4Signer.fmt_s3_request_uri(self._bucket_name, self._object_name) query_params = "" body = "" headers = AWSV4Signer(self._session.endpoint, self._session.service_name, self._session.region, self._session.access_key, self._session.secret_key).prepare_signed_header( 'PUT', request_uri, query_params, body) if (headers['Authorization'] is None): self._logger.error( fmt_reqid_log(self._request_id) + "Failed to generate v4 signature") sys.exit(-1) headers["Content-Length"] = str(self._object_size) self._logger.info( fmt_reqid_log(self._request_id) + "PUT on {}".format(self._session.endpoint + request_uri)) self._logger.debug( fmt_reqid_log(self._request_id) + "PUT with headers {}".format(headers)) self._timer.start() try: async with self._session.get_client_session().put( self._session.endpoint + request_uri, headers=headers, # Read all data from data_reader data=data_reader.fetch(transfer_size)) as resp: self._timer.stop() if data_reader.get_state() != S3RequestState.ABORTED: self._http_status = resp.status self._response_headers = resp.headers self._logger.info( fmt_reqid_log(self._request_id) + 'PUT Object completed with http status: {}'.format( resp.status)) # Validate if upload object etag matches. if self.get_etag() != data_reader.get_etag(): self._state = S3RequestState.FAILED error_msg = "ETag mismatch." self._logger.error( fmt_reqid_log(self._request_id) + 'Error Response: {}'.format(error_msg)) if resp.status == 200: self._state = S3RequestState.COMPLETED else: error_msg = await resp.text() self._logger.error( fmt_reqid_log(self._request_id) + 'Error Response: {}'.format(error_msg)) self._state = S3RequestState.FAILED except aiohttp.client_exceptions.ClientConnectorError as e: self._timer.stop() self.remote_down = True self._state = S3RequestState.FAILED self._logger.error( fmt_reqid_log(self._request_id) + "Failed to connect to S3: " + str(e)) return def pause(self): self._state = S3RequestState.PAUSED # XXX Take real pause action def resume(self): self._state = S3RequestState.PAUSED # XXX Take real resume action def abort(self): self._state = S3RequestState.ABORTED # Abort the reader so that PUT can stop. self._data_reader.abort()