def is_request_complete(self) -> bool:
     """
     Checks whether the request has completed.
     :return: bool True if complete, else False
     """
     results_bucket = S3Handler(os.environ['MATRIX_RESULTS_BUCKET'])
     return results_bucket.exists(self.s3_results_key)
Example #2
0
    def test_is_request_complete(self):
        self.assertFalse(self.request_tracker.is_request_complete())

        s3_handler = S3Handler(os.environ['MATRIX_RESULTS_BUCKET'])

        s3_handler.store_content_in_s3(
            f"{self.request_tracker.s3_results_key}/{self.request_id}.{self.request_tracker.format}",
            "")

        self.assertTrue(self.request_tracker.is_request_complete())
Example #3
0
    def __init__(self, request_id: str):
        Logging.set_correlation_id(logger, value=request_id)

        self.request_id = request_id
        self.request_tracker = RequestTracker(request_id)
        self.dynamo_handler = DynamoHandler()
        self.sqs_handler = SQSHandler()
        self.infra_config = MatrixInfraConfig()
        self.redshift_config = MatrixRedshiftConfig()
        self.query_results_bucket = os.environ['MATRIX_QUERY_RESULTS_BUCKET']
        self.s3_handler = S3Handler(os.environ['MATRIX_QUERY_BUCKET'])
    def lookup_cached_result(self) -> str:
        """
        Retrieves the S3 key of an existing matrix result that corresponds to this request's request hash.
        Returns "" if no such result exists
        :return: S3 key of cached result
        """
        results_bucket = S3Handler(os.environ['MATRIX_RESULTS_BUCKET'])
        objects = results_bucket.ls(f"{self.s3_results_prefix}/")

        if len(objects) > 0:
            return objects[0]['Key']
        return ""
Example #5
0
def invalidate_cache_entries(request_ids: list = None,
                             request_hashes: list = None):
    """
    Invalidates a list of request IDs and/or request hashes.
    Invalidation refers to the invalidation of the request in DynamoDB
    and the deletion of the associated matrix in S3.

    Invalidated requests will return an `ERROR` state and explanation
    to the user via the GET endpoint.

    Request hashes are resolved to a list of associated request IDs.
    :param request_ids: list of request IDs to invalidate
    :param request_hashes: list of request hashes to invalidate
    """
    print(f"Invalidating request IDs: {request_ids}")
    print(f"Invalidating request hashes: {request_hashes}")
    deployment_stage = os.environ['DEPLOYMENT_STAGE']
    dynamo_handler = DynamoHandler()
    data_version = dynamo_handler.get_table_item(
        table=DynamoTable.DEPLOYMENT_TABLE,
        key=deployment_stage)[DeploymentTableField.CURRENT_DATA_VERSION.value]
    for request_hash in request_hashes:
        items = dynamo_handler.filter_table_items(
            table=DynamoTable.REQUEST_TABLE,
            attrs={
                RequestTableField.REQUEST_HASH.value: request_hash,
                RequestTableField.DATA_VERSION.value: data_version
            })
        for item in items:
            request_ids.append(item[RequestTableField.REQUEST_ID.value])

    s3_keys_to_delete = []
    for request_id in request_ids:
        print(f"Writing deletion error to {request_id} in DynamoDB.")
        request_tracker = RequestTracker(request_id=request_id)
        request_tracker.log_error(
            "This request has been deleted and is no longer available for download. "
            "Please generate a new matrix at POST /v1/matrix.")
        s3_keys_to_delete.append(request_tracker.s3_results_key)

    print(f"Deleting matrices at the following S3 keys: {s3_keys_to_delete}")
    s3_results_bucket_handler = S3Handler(os.environ['MATRIX_RESULTS_BUCKET'])
    deleted_objects = s3_results_bucket_handler.delete_objects(
        s3_keys_to_delete)

    deleted_keys = [
        deleted_object['Key'] for deleted_object in deleted_objects
    ]

    print(
        f"Successfully deleted the following matrices {deleted_keys}. ({len(deleted_keys)}/{len(s3_keys_to_delete)})"
    )
Example #6
0
    def test_lookup_cached_result(self, mock_s3_results_prefix):
        mock_s3_results_prefix.return_value = "test_prefix"
        s3_handler = S3Handler(os.environ['MATRIX_RESULTS_BUCKET'])

        with self.subTest("Do not match in S3 'directories'"):
            s3_handler.store_content_in_s3("test_prefix", "test_content")
            self.assertEqual(self.request_tracker.lookup_cached_result(), "")

        with self.subTest("Successfully retrieve a result key"):
            s3_handler.store_content_in_s3("test_prefix/test_result_1",
                                           "test_content")
            s3_handler.store_content_in_s3("test_prefix/test_result_2",
                                           "test_content")
            self.assertEqual(self.request_tracker.lookup_cached_result(),
                             "test_prefix/test_result_1")
    def is_expired(self):
        """
        Whether or not the request has expired and the matrix in S3 has been deleted.
        :return: bool
        """
        s3_results_bucket_handler = S3Handler(
            os.environ['MATRIX_RESULTS_BUCKET'])
        is_past_expiration = date.to_datetime(
            self.creation_date) < date.get_datetime_now() - timedelta(days=30)
        is_expired = not s3_results_bucket_handler.exists(
            self.s3_results_key) and is_past_expiration

        if is_expired:
            self.log_error(
                "This request has expired after 30 days and is no longer available for download. "
                "A new matrix can be generated by resubmitting the POST request to /v1/matrix."
            )

        return is_expired
Example #8
0
def get_matrix(request_id: str):

    # There are a few cases to handle here. First, if the request_id is not in
    # the state table at all, then this id has never been made and we should
    # 404.
    request_tracker = RequestTracker(request_id)
    if not request_tracker.is_initialized:
        return ({
            'message': f"Unable to find job with request ID {request_id}."
        }, requests.codes.not_found)

    in_progress_response = ({
        'request_id':
        request_id,
        'status':
        MatrixRequestStatus.IN_PROGRESS.value,
        'matrix_url':
        "",
        'eta':
        "",
        'message':
        f"Request {request_id} has been accepted and is currently being "
        f"processed. Please try again later."
    }, requests.codes.ok)

    # if the request tracker is not able to retrieve the format,
    # it means that the driver has not created the relevant entry in the output table yet.
    try:
        format = request_tracker.format
    except MatrixException:
        return in_progress_response

    # Failed case
    if request_tracker.error:
        return ({
            'request_id': request_id,
            'status': MatrixRequestStatus.FAILED.value,
            'matrix_url': "",
            'eta': "",
            'message': request_tracker.error
        }, requests.codes.ok)
    # Check for failed batch conversion job
    elif request_tracker.batch_job_status and request_tracker.batch_job_status == "FAILED":
        request_tracker.log_error(
            "The matrix conversion as a part of the request has failed. \
            Please retry or contact an hca admin for help.")
        return ({
            'request_id': request_id,
            'status': MatrixRequestStatus.FAILED.value,
            'matrix_url': "",
            'eta': "",
            'message': request_tracker.error
        }, requests.codes.ok)

    # Complete case
    elif request_tracker.is_request_complete():
        matrix_results_bucket = os.environ['MATRIX_RESULTS_BUCKET']
        matrix_results_handler = S3Handler(matrix_results_bucket)

        matrix_key = ""
        if format == MatrixFormat.LOOM.value:
            matrix_key = f"{request_tracker.s3_results_prefix}/{request_id}.{format}"
        elif format == MatrixFormat.CSV.value or format == MatrixFormat.MTX.value:
            matrix_key = f"{request_tracker.s3_results_prefix}/{request_id}.{format}.zip"

        matrix_location = f"https://s3.amazonaws.com/{matrix_results_bucket}/{matrix_key}"

        is_empty = False
        if not matrix_results_handler.size(matrix_key):
            is_empty = True
            matrix_location = ""

        if not is_empty:
            message = (
                f"Request {request_id} has successfully completed. "
                f"The resultant expression matrix is available for download at "
                f"{matrix_location}.")
        else:
            message = (
                f"Request {request_id} has successfully completed. "
                f"But, there were no cells associated with this request and "
                f"species {request_tracker.genus_species.value}")

        return ({
            'request_id': request_id,
            'status': MatrixRequestStatus.COMPLETE.value,
            'matrix_url': matrix_location,
            'eta': "",
            'message': message
        }, requests.codes.ok)

    # Expired case
    elif request_tracker.is_expired:
        return ({
            'request_id': request_id,
            'status': MatrixRequestStatus.EXPIRED.value,
            'matrix_url': "",
            'eta': "",
            'message': request_tracker.error
        }, requests.codes.ok)

    # Timeout case
    elif request_tracker.timeout:

        return ({
            'request_id': request_id,
            'status': MatrixRequestStatus.FAILED.value,
            'matrix_url': "",
            'eta': "",
            'message': request_tracker.error
        }, requests.codes.ok)
    else:
        return in_progress_response
Example #9
0
 def setUp(self):
     super(TestS3Handler, self).setUp()
     self.create_s3_queries_bucket()
     self.s3_handler = S3Handler(os.environ['MATRIX_QUERY_BUCKET'])
     self.request_id = str(uuid.uuid4())
    def test_invalidate_cache_entries(self, mock_put_metric_data):
        """
        Setup:
        - Create four request ids mapping to two request hashes
        - Invalidate hash 1 (ids 1, 2) and id 3
        - Verify ids 1, 2 and 3 have been invalidated
        - Verify id 4 has not been invalidated
        """
        request_hash_1 = "test_hash_1"
        request_hash_2 = "test_hash_2"
        request_id_1 = "test_id_1"
        request_id_2 = "test_id_2"
        request_id_3 = "test_id_3"
        request_id_4 = "test_id_4"
        test_format = "test_format"
        test_content = "test_content"

        s3_key_1 = f"0/{request_hash_1}/{request_id_1}.{test_format}"
        s3_key_2 = f"0/{request_hash_1}/{request_id_2}.{test_format}"
        s3_key_3 = f"0/{request_hash_2}/{request_id_3}.{test_format}"
        s3_key_4 = f"0/{request_hash_2}/{request_id_4}.{test_format}"

        dynamo_handler = DynamoHandler()
        dynamo_handler.create_request_table_entry(request_id_1, test_format)
        dynamo_handler.create_request_table_entry(request_id_2, test_format)
        dynamo_handler.create_request_table_entry(request_id_3, test_format)
        dynamo_handler.create_request_table_entry(request_id_4, test_format)

        dynamo_handler.set_table_field_with_value(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_1,
            field_enum=RequestTableField.REQUEST_HASH,
            field_value=request_hash_1)
        dynamo_handler.set_table_field_with_value(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_2,
            field_enum=RequestTableField.REQUEST_HASH,
            field_value=request_hash_1)
        dynamo_handler.set_table_field_with_value(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_3,
            field_enum=RequestTableField.REQUEST_HASH,
            field_value=request_hash_2)
        dynamo_handler.set_table_field_with_value(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_4,
            field_enum=RequestTableField.REQUEST_HASH,
            field_value=request_hash_2)

        s3_results_bucket_handler = S3Handler(
            os.environ['MATRIX_RESULTS_BUCKET'])
        s3_results_bucket_handler.store_content_in_s3(s3_key_1, test_content)
        s3_results_bucket_handler.store_content_in_s3(s3_key_2, test_content)
        s3_results_bucket_handler.store_content_in_s3(s3_key_3, test_content)
        s3_results_bucket_handler.store_content_in_s3(s3_key_4, test_content)

        self.assertTrue(s3_results_bucket_handler.exists(s3_key_1))
        self.assertTrue(s3_results_bucket_handler.exists(s3_key_2))
        self.assertTrue(s3_results_bucket_handler.exists(s3_key_3))
        self.assertTrue(s3_results_bucket_handler.exists(s3_key_4))

        invalidate_cache_entries(request_ids=[request_id_3],
                                 request_hashes=[request_hash_1])

        error_1 = dynamo_handler.get_table_item(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_1)[RequestTableField.ERROR_MESSAGE.value]
        error_2 = dynamo_handler.get_table_item(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_2)[RequestTableField.ERROR_MESSAGE.value]
        error_3 = dynamo_handler.get_table_item(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_3)[RequestTableField.ERROR_MESSAGE.value]
        error_4 = dynamo_handler.get_table_item(
            table=DynamoTable.REQUEST_TABLE,
            key=request_id_4)[RequestTableField.ERROR_MESSAGE.value]

        self.assertFalse(s3_results_bucket_handler.exists(s3_key_1))
        self.assertFalse(s3_results_bucket_handler.exists(s3_key_2))
        self.assertFalse(s3_results_bucket_handler.exists(s3_key_3))
        self.assertTrue(s3_results_bucket_handler.exists(s3_key_4))

        self.assertNotEqual(error_1, 0)
        self.assertNotEqual(error_2, 0)
        self.assertNotEqual(error_3, 0)
        self.assertEqual(error_4, 0)
 def __init__(self):
     self.sqs_handler = SQSHandler()
     self.s3_handler = S3Handler(os.environ["MATRIX_QUERY_BUCKET"])
     self.batch_handler = BatchHandler()
     self.redshift_handler = RedshiftHandler()
     self.matrix_infra_config = MatrixInfraConfig()
Example #12
0
    def run(self, max_loops=None):
        loops = 0
        while max_loops is None or loops < max_loops:
            loops += 1
            messages = self.sqs_handler.receive_messages_from_queue(
                self.query_job_q_url)
            if messages:
                message = messages[0]
                logger.info(f"Received {message} from {self.query_job_q_url}")
                payload = json.loads(message['Body'])
                request_id = payload['request_id']
                request_tracker = RequestTracker(request_id)
                Logging.set_correlation_id(logger, value=request_id)
                obj_key = payload['s3_obj_key']
                query_type = payload['type']
                receipt_handle = message['ReceiptHandle']
                try:
                    logger.info(f"Fetching query from {obj_key}")
                    query = self.s3_handler.load_content_from_obj_key(obj_key)

                    logger.info(f"Running query from {obj_key}")
                    self.redshift_handler.transaction([query], read_only=True)
                    logger.info(f"Finished running query from {obj_key}")

                    logger.info(
                        f"Deleting {message} from {self.query_job_q_url}")
                    self.sqs_handler.delete_message_from_queue(
                        self.query_job_q_url, receipt_handle)

                    if query_type == QueryType.CELL.value:
                        cached_result_s3_key = request_tracker.lookup_cached_result(
                        )
                        if cached_result_s3_key:
                            s3 = S3Handler(os.environ['MATRIX_RESULTS_BUCKET'])
                            s3.copy_obj(cached_result_s3_key,
                                        request_tracker.s3_results_key)
                            continue

                    logger.info(
                        "Incrementing completed queries in state table")
                    request_tracker.complete_subtask_execution(Subtask.QUERY)

                    if request_tracker.is_request_ready_for_conversion():
                        logger.info("Scheduling batch conversion job")
                        batch_job_id = self.batch_handler.schedule_matrix_conversion(
                            request_id, request_tracker.format,
                            request_tracker.s3_results_key)
                        request_tracker.write_batch_job_id_to_db(batch_job_id)
                except Exception as e:
                    logger.info(
                        f"QueryRunner failed on {message} with error {e}")
                    request_tracker.log_error(str(e))
                    logger.error(traceback.format_exc())
                    logger.info(
                        f"Adding {message} to {self.query_job_deadletter_q_url}"
                    )
                    self.sqs_handler.add_message_to_queue(
                        self.query_job_deadletter_q_url, payload)
                    logger.info(
                        f"Deleting {message} from {self.query_job_q_url}")
                    self.sqs_handler.delete_message_from_queue(
                        self.query_job_q_url, receipt_handle)
            else:
                logger.info(f"No messages to read from {self.query_job_q_url}")