Ejemplo n.º 1
0
    def test_timeout(self, mock_creation_date, mock_log_error):
        # no timeout
        mock_creation_date.return_value = date.to_string(
            date.get_datetime_now() - timedelta(hours=35, minutes=59))
        self.assertFalse(self.request_tracker.timeout)

        # timeout
        mock_creation_date.return_value = date.to_string(
            date.get_datetime_now() - timedelta(hours=36, minutes=1))
        self.assertTrue(self.request_tracker.timeout)
        mock_log_error.assert_called_once()
Ejemplo n.º 2
0
    def create_data_version_table_entry(self, version: int):
        """
        Put a new item in the Data Version table responsible for describing the current and
        previous Redshift data versions for a deployment.
        If the new version already exists, it will be overwritten by the new entry.
        :param version: Version number to create
        """
        api_handler = V1ApiHandler()
        project_cell_counts = api_handler.describe_filter(
            "project.provenance.document_id")['cell_counts']

        metadata_schema_versions = {}
        for schema_name in SUPPORTED_METADATA_SCHEMA_VERSIONS:
            metadata_schema_versions[
                schema_name.
                value] = SUPPORTED_METADATA_SCHEMA_VERSIONS[schema_name]

        self._get_dynamo_table_resource_from_enum(
            DynamoTable.DATA_VERSION_TABLE).put_item(
                Item={
                    DataVersionTableField.DATA_VERSION.value:
                    version,
                    DataVersionTableField.CREATION_DATE.value:
                    date.get_datetime_now(as_string=True),
                    DataVersionTableField.PROJECT_CELL_COUNTS.value:
                    project_cell_counts,
                    DataVersionTableField.METADATA_SCHEMA_VERSIONS.value:
                    metadata_schema_versions
                })
Ejemplo n.º 3
0
    def run(self):
        try:
            LOGGER.debug(
                f"Beginning matrix conversion run for {self.args.request_id}")
            self.expression_manifest = self._parse_manifest(
                self.args.expression_manifest_key)
            self.cell_manifest = self._parse_manifest(
                self.args.cell_metadata_manifest_key)
            self.gene_manifest = self._parse_manifest(
                self.args.gene_metadata_manifest_key)

            LOGGER.debug(f"Beginning conversion to {self.format}")
            local_converted_path = getattr(self, f"_to_{self.format}")()
            LOGGER.debug(f"Conversion to {self.format} completed")

            LOGGER.debug(f"Beginning upload to S3")
            self._upload_converted_matrix(local_converted_path,
                                          self.target_path)
            LOGGER.debug("Upload to S3 complete, job finished")

            os.remove(local_converted_path)

            self.request_tracker.complete_subtask_execution(Subtask.CONVERTER)
            self.request_tracker.complete_request(
                duration=(date.get_datetime_now() -
                          date.to_datetime(self.request_tracker.creation_date)
                          ).total_seconds())
        except Exception as e:
            LOGGER.info(
                f"Matrix Conversion failed on {self.args.request_id} with error {str(e)}"
            )
            self.request_tracker.log_error(str(e))
            raise e
Ejemplo n.º 4
0
    def create_request_table_entry(
            self,
            request_id: str,
            fmt: str,
            metadata_fields: list = DEFAULT_FIELDS,
            feature: str = DEFAULT_FEATURE,
            genus_species: GenusSpecies = GenusSpecies.HUMAN):
        """
        Put a new item in the Request table responsible for tracking the inputs, task execution progress and errors
        of a Matrix Request.

        :param request_id: UUID identifying a matrix service request.
        :param fmt: User requested output file format of final expression matrix.
        :param metadata_fields: User requested metadata fields to include in the expression matrix.
        :param feature: User requested feature type of final expression matrix (gene|transcript).
        """
        data_version = \
            self.get_table_item(table=DynamoTable.DEPLOYMENT_TABLE,
                                key=os.environ['DEPLOYMENT_STAGE'])[DeploymentTableField.CURRENT_DATA_VERSION.value]

        self._get_dynamo_table_resource_from_enum(
            DynamoTable.REQUEST_TABLE).put_item(
                Item={
                    RequestTableField.REQUEST_ID.value:
                    request_id,
                    RequestTableField.REQUEST_HASH.value:
                    "N/A",
                    RequestTableField.DATA_VERSION.value:
                    data_version,
                    RequestTableField.CREATION_DATE.value:
                    date.get_datetime_now(as_string=True),
                    RequestTableField.GENUS_SPECIES.value:
                    genus_species.value,
                    RequestTableField.FORMAT.value:
                    fmt,
                    RequestTableField.METADATA_FIELDS.value:
                    metadata_fields,
                    RequestTableField.FEATURE.value:
                    feature,
                    RequestTableField.NUM_BUNDLES.value:
                    -1,
                    RequestTableField.ROW_COUNT.value:
                    0,
                    RequestTableField.EXPECTED_DRIVER_EXECUTIONS.value:
                    1,
                    RequestTableField.COMPLETED_DRIVER_EXECUTIONS.value:
                    0,
                    RequestTableField.EXPECTED_QUERY_EXECUTIONS.value:
                    3,
                    RequestTableField.COMPLETED_QUERY_EXECUTIONS.value:
                    0,
                    RequestTableField.EXPECTED_CONVERTER_EXECUTIONS.value:
                    1,
                    RequestTableField.COMPLETED_CONVERTER_EXECUTIONS.value:
                    0,
                    RequestTableField.BATCH_JOB_ID.value:
                    "N/A",
                    RequestTableField.ERROR_MESSAGE.value:
                    0
                })
Ejemplo n.º 5
0
def _log_error(entity: str, exception: Exception, trace: str,
               extractor: DSSExtractor):
    """
    Logs an ETL error and exception stack trace to a file.
    Error messages and exceptions are written to 'errors.txt'
    A list of failed entities are written to 'failed_transforms.txt'
    :param entity: A MetadataToPsvTransformer, or a bundle FQID for CellExpressionTransformer errors
    :param exception: Thrown exception string
    :param trace: Exception stack trace
    :param extractor: DSSExtractor
    """
    logger.error(f"Failed to transform {entity}.", exception)

    timestamp = date.get_datetime_now(as_string=True)
    log_file_path = os.path.join(extractor.sd,
                                 MetadataToPsvTransformer.LOG_DIRNAME,
                                 'errors.txt')
    with open(log_file_path, 'a+') as fh:
        fh.write(
            f"[{timestamp}] {entity} failed with exception: {exception}\n{trace}\n"
        )

    ft_file_path = os.path.join(extractor.sd,
                                MetadataToPsvTransformer.LOG_DIRNAME,
                                'failed_transforms.txt')
    with open(ft_file_path, 'a+') as fh:
        fh.write(f"{entity}\n")
Ejemplo n.º 6
0
 def timeout(self) -> bool:
     timeout = date.to_datetime(
         self.creation_date) < date.get_datetime_now() - timedelta(hours=36)
     if timeout:
         self.log_error(
             "This request has timed out after 12 hours."
             "Please try again by resubmitting the POST request.")
     return timeout
Ejemplo n.º 7
0
    def test_get_matrix_processing__post_driver(self, mock_is_request_complete, mock_get_table_item, mock_initialized):
        request_id = str(uuid.uuid4())
        mock_initialized.return_value = True
        mock_is_request_complete.return_value = False
        mock_get_table_item.return_value = {RequestTableField.ERROR_MESSAGE.value: "",
                                            RequestTableField.FORMAT.value: "test_format",
                                            RequestTableField.CREATION_DATE.value: get_datetime_now(as_string=True)}

        response = get_matrix(request_id)

        self.assertEqual(response[1], requests.codes.ok)
        self.assertEqual(response[0]['status'], MatrixRequestStatus.IN_PROGRESS.value)
Ejemplo n.º 8
0
    def test_is_expired(self, mock_exists, mock_creation_date, mock_log_error):
        with self.subTest("Expired"):
            mock_exists.return_value = False
            mock_creation_date.return_value = date.to_string(
                date.get_datetime_now() - timedelta(days=30, minutes=1))

            self.assertTrue(self.request_tracker.is_expired)
            mock_log_error.assert_called_once()
            mock_log_error.reset_mock()

        with self.subTest(
                "Not expired. Matrix DNE but not past expiration date"):
            mock_exists.return_value = False
            mock_creation_date.return_value = date.to_string(
                date.get_datetime_now() - timedelta(days=29))

            self.assertFalse(self.request_tracker.is_expired)
            mock_log_error.assert_not_called()

        with self.subTest("Not expired. Matrix exists"):
            mock_exists.return_value = True

            self.assertFalse(self.request_tracker.is_expired)
            mock_log_error.assert_not_called()
Ejemplo n.º 9
0
    def is_expired(self):
        """
        Whether or not the request has expired and the matrix in S3 has been deleted.
        :return: bool
        """
        s3_results_bucket_handler = S3Handler(
            os.environ['MATRIX_RESULTS_BUCKET'])
        is_past_expiration = date.to_datetime(
            self.creation_date) < date.get_datetime_now() - timedelta(days=30)
        is_expired = not s3_results_bucket_handler.exists(
            self.s3_results_key) and is_past_expiration

        if is_expired:
            self.log_error(
                "This request has expired after 30 days and is no longer available for download. "
                "A new matrix can be generated by resubmitting the POST request to /v1/matrix."
            )

        return is_expired
Ejemplo n.º 10
0
    def run(self):
        try:
            LOGGER.debug(
                f"Beginning matrix conversion run for {self.args.request_id}")
            self.query_results = {
                QueryType.CELL:
                CellQueryResultsReader(self.args.cell_metadata_manifest_key),
                QueryType.EXPRESSION:
                ExpressionQueryResultsReader(
                    self.args.expression_manifest_key),
                QueryType.FEATURE:
                FeatureQueryResultsReader(self.args.gene_metadata_manifest_key)
            }

            if self.query_results[QueryType.CELL].is_empty:
                LOGGER.debug(
                    f"Short-circuiting conversion because there are no cells.")
                pathlib.Path(self.local_output_filename).touch()
                local_converted_path = self.local_output_filename
            else:
                LOGGER.debug(f"Beginning conversion to {self.format}")
                local_converted_path = getattr(self, f"_to_{self.format}")()
                LOGGER.debug(f"Conversion to {self.format} completed")

            LOGGER.debug(f"Beginning upload to S3")
            self._upload_converted_matrix(local_converted_path,
                                          self.target_path)
            LOGGER.debug("Upload to S3 complete, job finished")

            os.remove(local_converted_path)

            self.request_tracker.complete_subtask_execution(Subtask.CONVERTER)
            self.request_tracker.complete_request(
                duration=(date.get_datetime_now() -
                          date.to_datetime(self.request_tracker.creation_date)
                          ).total_seconds())
        except Exception as e:
            LOGGER.info(
                f"Matrix Conversion failed on {self.args.request_id} with error {str(e)}"
            )
            self.request_tracker.log_error(str(e))
            raise e
Ejemplo n.º 11
0
    def test_get_matrix_no_cells(self, mock_is_request_complete,
                                 mock_get_table_item, mock_batch_job_status,
                                 mock_s3_size):

        request_id = str(uuid.uuid4())
        mock_get_table_item.return_value = {
            RequestTableField.DATA_VERSION.value: 0,
            RequestTableField.ERROR_MESSAGE.value: "",
            RequestTableField.FORMAT.value: "test_format",
            RequestTableField.GENUS_SPECIES.value: GenusSpecies.HUMAN.value,
            RequestTableField.CREATION_DATE.value:
            get_datetime_now(as_string=True)
        }
        mock_batch_job_status.return_value = "SUCCEEDED"
        mock_is_request_complete.return_value = True
        mock_s3_size.return_value = 0

        response = core.get_matrix(request_id)
        self.assertEqual(response[1], requests.codes.ok)
        self.assertEqual(response[0]['status'],
                         MatrixRequestStatus.COMPLETE.value)
        self.assertEqual(response[0]['matrix_url'], "")
Ejemplo n.º 12
0
 def test_get_datetime_now(self):
     date_now = date.get_datetime_now()
     self.assertTrue((date_now - self.now).total_seconds() <= 1)
Ejemplo n.º 13
0
 def setup(self):
     self.timestamp = date.get_datetime_now(as_string=True)
     print(f"Running test with timestamp {self.timestamp}")