def test_decrypt_with_string_token(self): secret = base64.b64encode(b"x" * 32) message = "Test string" token = PrivatePublisher.encrypt(message.encode("utf-8"), secret=secret) rv = PrivatePublisher.decrypt(token.decode("utf-8"), secret=secret) self.assertEqual(message, rv)
def test_roundtrip_bytes_message(self): secret = base64.b64encode(b"x" * 32) message = "Test string" token = PrivatePublisher.encrypt(message.encode("utf-8"), secret=secret) rv = PrivatePublisher.decrypt(token, secret=secret) self.assertEqual(message, rv)
def __init__(self, logger=logger): self.logger = logger self.tx_id = None self.rrm_publisher = PrivatePublisher( settings.RABBIT_URLS, settings.RABBIT_RRM_RECEIPT_QUEUE) self.notifications = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_SURVEY_QUEUE) self.dap = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_DAP_QUEUE)
def test_encrypt_string_message(self): secret = base64.b64encode(b"x" * 32) message = "Test string" rv = PrivatePublisher.encrypt(message, secret=secret) self.assertIsInstance(rv, bytes) self.assertIsInstance(rv.decode("ascii"), str) self.assertIsInstance(base64.urlsafe_b64decode(rv.decode("ascii")), bytes)
class ResponseProcessor: @staticmethod def options(): rv = {} try: rv["secret"] = os.getenv("SDX_COLLECT_SECRET").encode("ascii") except AttributeError: # No secret in env pass return rv def __init__(self, logger=logger): self.logger = logger self.tx_id = None self.rrm_publisher = PrivatePublisher( settings.RABBIT_URLS, settings.RABBIT_RRM_RECEIPT_QUEUE) self.notifications = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_SURVEY_QUEUE) self.dap = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_DAP_QUEUE) def service_name(self, url=None): try: parts = url.split('/') if 'responses' in parts: return 'SDX-STORE' elif 'decrypt' in parts: return 'SDX-DECRYPT' elif 'validate' in parts: return 'SDX-VALIDATE' except AttributeError: self.logger.exception("No valid service name") def process(self, msg, tx_id=None, decrypt=True): # Bind the tx_id from the rabbit message header as we don't have access to the one in the survey yet. self.logger = self.logger.bind(tx_id=tx_id) if decrypt: decrypted_json = self.decrypt_survey(msg) else: decrypted_json = msg metadata = decrypted_json.get('metadata', {}) self.logger = self.logger.bind(user_id=metadata.get('user_id'), ru_ref=metadata.get('ru_ref')) if not tx_id: self.tx_id = decrypted_json.get('tx_id') elif tx_id != decrypted_json.get('tx_id'): self.logger.info( 'tx_ids from decrypted_json and message header do not match. Rejecting message', decrypted_tx_id=decrypted_json.get('tx_id'), message_tx_id=self.tx_id) raise QuarantinableError else: self.tx_id = tx_id valid = self.validate_survey(decrypted_json) if not valid: self.logger.info( "Invalid survey data, skipping receipting and downstream processing" ) decrypted_json['invalid'] = True store_response_json = self.store_survey(decrypted_json) self.logger.info("Saved data to the database", id=store_response_json) if valid and self._requires_receipting(decrypted_json): self.send_receipt(decrypted_json) if valid and self._requires_downstream_processing(decrypted_json): self.send_notification(store_response_json) if valid and self._requires_dap_processing(decrypted_json): self.send_to_dap_queue(decrypted_json) # If we don't unbind these fields, their current value will be retained for the next # submission. This leads to incorrect values being logged out in the bound fields. self.logger = self.logger.unbind("user_id", "ru_ref", "tx_id") def decrypt_survey(self, encrypted_survey): self.logger.info("Decrypting survey") response = self.remote_call(settings.SDX_DECRYPT_URL, data=encrypted_survey) try: self.response_ok(response) except ClientError: self.logger.error( "Survey decryption unsuccessful. Quarantining Survey.") raise QuarantinableError self.logger.info("Survey decryption successful") return response.json() def validate_survey(self, decrypted_json): self.logger.info("Validating survey") try: self.response_ok( self.remote_call(settings.SDX_VALIDATE_URL, json=decrypted_json)) except ClientError: # If the validation fails, the message is to be marked "invalid" # and then stored. We don't then want to stop processing at this point. return False self.logger.info("Survey validation successful") return True def store_survey(self, decrypted_json): self.logger.info("Storing survey") response = self.remote_call(settings.SDX_RESPONSES_URL, json=decrypted_json) try: self.response_ok(response) except ClientError: self.logger.error( "Survey storage unsuccessful. Quarantining Survey.") raise QuarantinableError self.logger.info("Survey storage successful") return response.json() def _requires_receipting(self, decrypted_json): if self._is_feedback_survey(decrypted_json): self.logger.info("Feedback survey, skipping receipting") return False return True def make_receipt(self, decrypted_json): try: receipt_json = { 'case_id': decrypted_json['case_id'], 'tx_id': decrypted_json['tx_id'], 'collection': { 'exercise_sid': decrypted_json['collection']['exercise_sid'] }, 'metadata': { 'ru_ref': decrypted_json['metadata']['ru_ref'], 'user_id': decrypted_json['metadata']['user_id'] } } except KeyError: self.logger.exception("Unsuccessful publish, missing key values") raise QuarantinableError return receipt_json def _requires_dap_processing(self, decrypted_json): if self._is_feedback_survey(decrypted_json): self.logger.info("Feedback survey, skipping sending to DAP") return False if decrypted_json.get("survey_id") in [ "007", "023", "134", "147", "281", "283", "lms", "census" ]: # low carbon, RSI, MWSS, EPE, Dtrades self.logger.info("Sending to DAP", survey_id=decrypted_json.get("survey_id")) return True return False def make_dap_data(self, decrypted_json): """Creates the json payload required by minifi to send the submission to dap""" self.logger.info("Creating dap data") response = self.remote_call( f"{settings.SDX_RESPONSES_URL}/{decrypted_json['tx_id']}") try: self.response_ok(response) except ClientError: self.logger.error("Survey retrieval failed. Quarantining Survey.") raise QuarantinableError try: description = "{} survey response for period {} sample unit {}".format( decrypted_json['survey_id'], decrypted_json['collection']['period'], decrypted_json['metadata']['ru_ref']) dap_json = { 'version': '1', 'files': [{ 'name': f"{decrypted_json['tx_id']}.json", 'URL': f"{settings.SDX_RESPONSES_URL}/{decrypted_json['tx_id']}", 'sizeBytes': response.headers['Content-Length'], 'md5sum': response.headers['Content-MD5'] }], 'sensitivity': 'High', 'sourceName': settings.DAP_SOURCE_NAME, 'manifestCreated': self._get_formatted_current_utc(), 'description': description, 'iterationL1': decrypted_json['collection']['period'], 'dataset': decrypted_json['survey_id'], 'schemaversion': '1' } except KeyError: self.logger.exception("Unsuccesful publish, missing key values") raise QuarantinableError self.logger.info("Created dap data") return dap_json def _get_formatted_current_utc(self): """ Returns a formatted utc date with only 3 milliseconds as opposed to the ususal 6 that python provides. Additionally, we provide the Zulu time indicator (Z) at the end to indicate it being UTC time. This is done for consistency with timestamps provided in other languages. The format the time is returned is YYYY-mm-ddTHH:MM:SS.fffZ (e.g., 2018-10-10T08:42:24.737Z) """ date_time = datetime.utcnow() milliseconds = date_time.strftime("%f")[:3] return f"{date_time.strftime('%Y-%m-%dT%H:%M:%S')}.{milliseconds}Z" def _requires_downstream_processing(self, decrypted_json): if decrypted_json.get("version") == "0.0.2": survey_id = decrypted_json.get("survey_id") self.logger.info("Skipping downstream processing", survey_id=survey_id) return False elif decrypted_json.get("survey_id") == "283": if self._is_feedback_survey(decrypted_json): return True else: self.logger.info( "Covid-19 survey, skipping downstream processing") return False return True @staticmethod def _is_feedback_survey(decrypted_json): response_type = str(decrypted_json.get("type")) return response_type.find("feedback") != -1 def send_receipt(self, decrypted_json): if not decrypted_json.get("survey_id"): self.logger.error("No survey id") raise QuarantinableError self.logger.info("Receipting survey") receipt = self.make_receipt(decrypted_json) try: self.logger.info("About to publish receipt into rrm queue") self.logger.debug(str(receipt)) self.rrm_publisher.publish( dumps(receipt), headers={'tx_id': decrypted_json['tx_id']}, secret=settings.SDX_COLLECT_SECRET) self.logger.info("Receipt published") except PublishMessageError: self.logger.exception("Unsuccesful publish") raise RetryableError def send_notification(self, store_response_json): self.logger.info("Sending to downstream") try: self.logger.info("About to publish notification to queue") self.notifications.publish_message(json.dumps(store_response_json), headers={'tx_id': self.tx_id}) except PublishMessageError: self.logger.exception("Unable to queue response notification") raise RetryableError def send_to_dap_queue(self, decrypted_json): self.logger.info("Sending data to dap queue") message = self.make_dap_data(decrypted_json) try: self.logger.info("Publishing data to dap queue") self.dap.publish_message(dumps(message), headers={'tx_id': self.tx_id}) except PublishMessageError: self.logger.exception("Failed to publish to dap queue") raise RetryableError self.logger.info("Successfully published to dap queue") def remote_call(self, request_url, json=None, data=None): service = self.service_name(request_url) try: self.logger.info("Calling service", request_url=request_url, service=service) if json: return session.post(request_url, json=json, verify=True) if data: return session.post(request_url, data=data, verify=True) return session.get(request_url, verify=True) except MaxRetryError: self.logger.error("Max retries exceeded (5)", request_url=request_url) raise RetryableError except ConnectionError: self.logger.error("Connection error occurred. Retrying") raise RetryableError def response_ok(self, res): request_url = res.url service = self.service_name(request_url) res_logger = self.logger.bind(request_url=res.url, status=res.status_code) if res.status_code == 200 or res.status_code == 201: res_logger.info("Returned from service", response="ok", service=service) return elif 400 <= res.status_code < 500: if res.json().get('contains_invalid_character'): logger.error( "Invalid character found in payload, quarantining submission" ) raise QuarantinableError res_logger.error("Returned from service", response="client error", service=service) raise ClientError else: res_logger.error("Returned from service", response="service error", service=service) raise RetryableError
def test_roundtrip_string_message(self): secret = base64.b64encode(b"x" * 32) message = "Test string" token = PrivatePublisher.encrypt(message, secret=secret) rv = PrivatePublisher.decrypt(token, secret=secret) self.assertEqual(message, rv)
def __init__(self, logger=logger): self.logger = logger self.tx_id = None self.rrm_publisher = PrivatePublisher(settings.RABBIT_URLS, settings.RABBIT_RRM_RECEIPT_QUEUE) self.notifications = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_SURVEY_QUEUE) self.dap = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_DAP_QUEUE)
class ResponseProcessor: @staticmethod def options(): rv = {} try: rv["secret"] = os.getenv("SDX_COLLECT_SECRET").encode("ascii") except AttributeError: # No secret in env pass return rv def __init__(self, logger=logger): self.logger = logger self.tx_id = None self.rrm_publisher = PrivatePublisher(settings.RABBIT_URLS, settings.RABBIT_RRM_RECEIPT_QUEUE) self.notifications = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_SURVEY_QUEUE) self.dap = QueuePublisher(settings.RABBIT_URLS, settings.RABBIT_DAP_QUEUE) def service_name(self, url=None): try: parts = url.split('/') if 'responses' in parts: return 'SDX-STORE' elif 'decrypt' in parts: return 'SDX-DECRYPT' elif 'validate' in parts: return 'SDX-VALIDATE' except AttributeError: self.logger.exception("No valid service name") def process(self, msg, tx_id=None): decrypted_json = self.decrypt_survey(msg) metadata = decrypted_json.get('metadata', {}) self.logger = self.logger.bind( user_id=metadata.get('user_id'), ru_ref=metadata.get('ru_ref')) if not tx_id: self.tx_id = decrypted_json.get('tx_id') elif tx_id != decrypted_json.get('tx_id'): self.logger.info( 'tx_ids from decrypted_json and message header do not match. Rejecting message', decrypted_tx_id=decrypted_json.get('tx_id'), message_tx_id=self.tx_id) raise QuarantinableError else: self.tx_id = tx_id self.logger = self.logger.bind(tx_id=self.tx_id) valid = self.validate_survey(decrypted_json) if not valid: self.logger.info("Invalid survey data, skipping receipting and downstream processing") decrypted_json['invalid'] = True self.store_survey(decrypted_json) if valid and self._requires_receipting(decrypted_json): self.send_receipt(decrypted_json) if valid and self._requires_downstream_processing(decrypted_json): self.send_notification() if valid and self._requires_dap_processing(decrypted_json): self.send_to_dap_queue(decrypted_json) self.logger.unbind("user_id", "ru_ref", "tx_id") def decrypt_survey(self, encrypted_survey): self.logger.info("Decrypting survey") response = self.remote_call(settings.SDX_DECRYPT_URL, data=encrypted_survey) try: self.response_ok(response) except ClientError: self.logger.error("Survey decryption unsuccessful. Quarantining Survey.") raise QuarantinableError self.logger.info("Survey decryption successful") return response.json() def validate_survey(self, decrypted_json): self.logger.info("Validating survey") try: self.response_ok(self.remote_call(settings.SDX_VALIDATE_URL, json=decrypted_json)) except ClientError: # If the validation fails, the message is to be marked "invalid" # and then stored. We don't then want to stop processing at this point. return False self.logger.info("Survey validation successful") return True def store_survey(self, decrypted_json): self.logger.info("Storing survey") response = self.remote_call(settings.SDX_RESPONSES_URL, json=decrypted_json) try: self.response_ok(response) except ClientError: self.logger.error("Survey storage unsuccessful. Quarantining Survey.") raise QuarantinableError self.logger.info("Survey storage successful") return response def _requires_receipting(self, decrypted_json): if self._is_feedback_survey(decrypted_json): self.logger.info("Feedback survey, skipping receipting") return False return True def make_receipt(self, decrypted_json): try: receipt_json = { 'case_id': decrypted_json['case_id'], 'tx_id': decrypted_json['tx_id'], 'collection': { 'exercise_sid': decrypted_json['collection']['exercise_sid'] }, 'metadata': { 'ru_ref': decrypted_json['metadata']['ru_ref'], 'user_id': decrypted_json['metadata']['user_id'] } } except KeyError: self.logger.exception("Unsuccesful publish, missing key values") raise QuarantinableError return receipt_json def _requires_dap_processing(self, decrypted_json): if self._is_feedback_survey(decrypted_json): self.logger.info("Feedback survey, skipping sending to DAP") return False if decrypted_json.get("survey_id") in ["023", "281", "lms", "census"]: # RSI, Dtrades self.logger.info("Sending to DAP", survey_id=decrypted_json.get("survey_id")) return True return False def make_dap_data(self, decrypted_json): self.logger.info("Creating dap data") response = self.remote_call('{}/{}'.format(settings.SDX_RESPONSES_URL, decrypted_json['tx_id'])) try: self.response_ok(response) except ClientError: self.logger.error("Survey retrieval failed. Quarantining Survey.") raise QuarantinableError try: description = "{} survey response for period {} sample unit {}".format( decrypted_json['survey_id'], decrypted_json['collection']['period'], decrypted_json['metadata']['ru_ref']) dap_json = { 'version': '1', 'files': [{ 'name': '{}.json'.format(decrypted_json['tx_id']), 'URL': '{}/{}'.format(settings.SDX_RESPONSES_URL, decrypted_json['tx_id']), 'sizeBytes': response.headers['Content-Length'], 'md5sum': response.headers['Content-MD5'] }], 'sensitivity': 'High', 'sourceName': settings.DAP_SOURCE_NAME, 'manifestCreated': self._get_formatted_current_utc(), 'description': description, 'iterationL1': decrypted_json['collection']['period'], 'dataset': decrypted_json['survey_id'], 'schemaversion': '1' } except KeyError: self.logger.exception("Unsuccesful publish, missing key values") raise QuarantinableError self.logger.info("Created dap data") return dap_json def _get_formatted_current_utc(self): """ Returns a formatted utc date with only 3 milliseconds as opposed to the ususal 6 that python provides. Additionally, we provide the Zulu time indicator (Z) at the end to indicate it being UTC time. This is done for consistency with timestamps provided in other languages. The format the time is returned is YYYY-mm-ddTHH:MM:SS.fffZ (e.g., 2018-10-10T08:42:24.737Z) """ date_time = datetime.utcnow() milliseconds = date_time.strftime("%f")[:3] return '{}.{}Z'.format(date_time.strftime("%Y-%m-%dT%H:%M:%S"), milliseconds) def _requires_downstream_processing(self, decrypted_json): if self._is_feedback_survey(decrypted_json): self.logger.info("Feedback survey, skipping downstream processing") return False elif decrypted_json.get("version") == "0.0.2": survey_id = decrypted_json.get("survey_id") self.logger.info("Skipping downstream processing", survey_id=survey_id) return False return True @staticmethod def _is_feedback_survey(decrypted_json): response_type = str(decrypted_json.get("type")) return response_type.find("feedback") != -1 def send_receipt(self, decrypted_json): if not decrypted_json.get("survey_id"): self.logger.error("No survey id") raise QuarantinableError self.logger.info("Receipting survey") receipt = self.make_receipt(decrypted_json) try: self.logger.info("About to publish receipt into rrm queue") self.logger.debug(str(receipt)) self.rrm_publisher.publish( dumps(receipt), headers={'tx_id': decrypted_json['tx_id']}, secret=settings.SDX_COLLECT_SECRET) self.logger.info("Receipt published") except PublishMessageError: self.logger.exception("Unsuccesful publish") raise RetryableError def send_notification(self): self.logger.info("Sending to downstream") try: self.logger.info("About to publish notification to queue") self.notifications.publish_message( self.tx_id, headers={ 'tx_id': self.tx_id }) except PublishMessageError as e: self.logger.error("Unable to queue response notification", error=e) raise RetryableError def send_to_dap_queue(self, decrypted_json): self.logger.info("Sending data to dap queue") message = self.make_dap_data(decrypted_json) try: self.logger.info("Publishing data to dap queue") self.dap.publish_message( dumps(message), headers={'tx_id': self.tx_id}) except PublishMessageError: self.logger.exception("Failed to publish to dap queue") raise RetryableError self.logger.info("Successfully published to dap queue") def remote_call(self, request_url, json=None, data=None, headers=None, verify=True, auth=None): service = self.service_name(request_url) try: self.logger.info("Calling service", request_url=request_url, service=service) r = None if json: r = session.post( request_url, json=json, headers=headers, verify=verify, auth=auth) elif data: r = session.post( request_url, data=data, headers=headers, verify=verify, auth=auth) else: r = session.get(request_url, headers=headers, verify=verify, auth=auth) return r except MaxRetryError: self.logger.error("Max retries exceeded (5)", request_url=request_url) raise RetryableError except ConnectionError: self.logger.error("Connection error occurred. Retrying") raise RetryableError def response_ok(self, res): request_url = res.url service = self.service_name(request_url) res_logger = self.logger res_logger.bind(request_url=res.url, status=res.status_code) if res.status_code == 200 or res.status_code == 201: res_logger.info("Returned from service", response="ok", service=service) return elif 400 <= res.status_code < 500: res_logger.error( "Returned from service", response="client error", service=service) raise ClientError else: res_logger.error( "Returned from service", response="service error", service=service) raise RetryableError