Beispiel #1
0
 def test_get_bool_param_value(self) -> None:
     self.assertEqual(
         params.get_bool_param_value("false_bool_param",
                                     PARAMS,
                                     default=True), False)
     self.assertEqual(
         params.get_bool_param_value("true_bool_param",
                                     PARAMS,
                                     default=False), True)
Beispiel #2
0
    def test_get_bool_param_value_malformed(self) -> None:
        with self.assertRaises(ValueError):
            params.get_bool_param_value("malformed_bool_param",
                                        PARAMS,
                                        default=False)

        with self.assertRaises(ValueError):
            params.get_bool_param_value("empty_bool_param",
                                        PARAMS,
                                        default=False)
Beispiel #3
0
def handle_new_files() -> Tuple[str, HTTPStatus]:
    """Normalizes and splits files in the ingest bucket for a given region as
    is appropriate. Will schedule the next process_job task if no renaming /
    splitting work has been done that will trigger subsequent calls to this
    endpoint.
    """
    logging.info('Received request for direct ingest handle_new_files: %s',
                 request.values)
    region_code = get_str_param_value('region', request.values)
    can_start_ingest = \
        get_bool_param_value('can_start_ingest', request.values, default=False)

    if not region_code or can_start_ingest is None:
        return f'Bad parameters [{request.values}]', HTTPStatus.BAD_REQUEST

    with monitoring.push_region_tag(region_code):
        try:
            controller = controller_for_region_code(region_code,
                                                    allow_unlaunched=True)
        except DirectIngestError as e:
            if e.is_bad_request():
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        if not isinstance(controller, GcsfsDirectIngestController):
            raise DirectIngestError(
                msg=f"Unexpected controller type [{type(controller)}].",
                error_type=DirectIngestErrorType.INPUT_ERROR)

        controller.handle_new_files(can_start_ingest=can_start_ingest)
    return '', HTTPStatus.OK
Beispiel #4
0
def handle_direct_ingest_file() -> Tuple[str, HTTPStatus]:
    """Called from a Cloud Function when a new file is added to a direct ingest
    bucket. Will trigger a job that deals with normalizing and splitting the
    file as is appropriate, then start the scheduler if allowed.
    """
    region_code = get_str_param_value('region', request.args)
    # The bucket name for the file to ingest
    bucket = get_str_param_value('bucket', request.args)
    # The relative path to the file, not including the bucket name
    relative_file_path = get_str_param_value('relative_file_path',
                                             request.args,
                                             preserve_case=True)
    start_ingest = \
        get_bool_param_value('start_ingest', request.args, default=False)

    if not region_code or not bucket \
            or not relative_file_path or start_ingest is None:
        return f'Bad parameters [{request.args}]', HTTPStatus.BAD_REQUEST

    with monitoring.push_region_tag(region_code):
        controller = controller_for_region_code(region_code,
                                                allow_unlaunched=True)
        if not isinstance(controller, GcsfsDirectIngestController):
            raise DirectIngestError(
                msg=f"Unexpected controller type [{type(controller)}].",
                error_type=DirectIngestErrorType.INPUT_ERROR)

        path = GcsfsPath.from_bucket_and_blob_name(
            bucket_name=bucket, blob_name=relative_file_path)

        if isinstance(path, GcsfsFilePath):
            controller.handle_file(path, start_ingest=start_ingest)

    return '', HTTPStatus.OK
Beispiel #5
0
def scheduler() -> Tuple[str, HTTPStatus]:
    """Checks the state of the ingest instance and schedules any tasks to be run."""
    logging.info("Received request for direct ingest scheduler: %s",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    just_finished_job = get_bool_param_value("just_finished_job",
                                             request.values,
                                             default=False)

    # The bucket name for ingest instance to schedule work out of
    bucket = get_str_param_value("bucket", request.args)

    if not region_code or just_finished_job is None or not bucket:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    bucket_path = GcsfsBucketPath(bucket)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                bucket_path).value,
    ):
        try:
            controller = DirectIngestControllerFactory.build(
                ingest_bucket_path=bucket_path, allow_unlaunched=False)
        except DirectIngestError as e:
            if e.is_bad_request():
                logging.error(str(e))
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        controller.schedule_next_ingest_job(just_finished_job)
    return "", HTTPStatus.OK
Beispiel #6
0
def handle_validation_request():
    """API endpoint to service data validation requests."""
    should_update_views = get_bool_param_value('should_update_views',
                                               request.args,
                                               default=False)
    failed_validations = execute_validation(
        should_update_views=should_update_views)

    return _readable_response(failed_validations), HTTPStatus.OK
Beispiel #7
0
def handle_direct_ingest_file() -> Tuple[str, HTTPStatus]:
    """Called from a Cloud Function when a new file is added to a direct ingest
    bucket. Will trigger a job that deals with normalizing and splitting the
    file as is appropriate, then start the scheduler if allowed.
    """
    region_code = get_str_param_value("region", request.args)
    # The bucket name for the file to ingest
    bucket = get_str_param_value("bucket", request.args)
    # The relative path to the file, not including the bucket name
    relative_file_path = get_str_param_value("relative_file_path",
                                             request.args,
                                             preserve_case=True)
    start_ingest = get_bool_param_value("start_ingest",
                                        request.args,
                                        default=False)

    if not region_code or not bucket or not relative_file_path or start_ingest is None:
        response = f"Bad parameters [{request.args}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    bucket_path = GcsfsBucketPath(bucket_name=bucket)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                bucket_path).value,
    ):
        try:
            controller = DirectIngestControllerFactory.build(
                ingest_bucket_path=bucket_path,
                allow_unlaunched=True,
            )
        except DirectIngestError as e:
            if e.is_bad_request():
                logging.error(str(e))
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        path = GcsfsPath.from_bucket_and_blob_name(
            bucket_name=bucket, blob_name=relative_file_path)

        if isinstance(path, GcsfsFilePath):
            controller.handle_file(path, start_ingest=start_ingest)

    return "", HTTPStatus.OK
Beispiel #8
0
def scheduler() -> Tuple[str, HTTPStatus]:
    logging.info('Received request for direct ingest scheduler: %s',
                 request.values)
    region_code = get_str_param_value('region', request.values)
    just_finished_job = \
        get_bool_param_value('just_finished_job', request.values, default=False)

    if not region_code or just_finished_job is None:
        return f'Bad parameters [{request.values}]', HTTPStatus.BAD_REQUEST

    with monitoring.push_region_tag(region_code):
        try:
            controller = controller_for_region_code(region_code)
        except DirectIngestError as e:
            if e.is_bad_request():
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        controller.schedule_next_ingest_job_or_wait_if_necessary(
            just_finished_job)
    return '', HTTPStatus.OK
Beispiel #9
0
def handle_new_files() -> Tuple[str, HTTPStatus]:
    """Normalizes and splits files in the ingest bucket for a given region as
    is appropriate. Will schedule the next process_job task if no renaming /
    splitting work has been done that will trigger subsequent calls to this
    endpoint.
    """
    logging.info("Received request for direct ingest handle_new_files: %s",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    can_start_ingest = get_bool_param_value("can_start_ingest",
                                            request.values,
                                            default=False)
    bucket = get_str_param_value("bucket", request.values)

    if not region_code or can_start_ingest is None or not bucket:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    bucket_path = GcsfsBucketPath(bucket_name=bucket)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                bucket_path).value,
    ):
        try:
            controller = DirectIngestControllerFactory.build(
                ingest_bucket_path=bucket_path,
                allow_unlaunched=True,
            )
        except DirectIngestError as e:
            if e.is_bad_request():
                logging.error(str(e))
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        controller.handle_new_files(can_start_ingest=can_start_ingest)
    return "", HTTPStatus.OK
Beispiel #10
0
 def test_get_bool_param_value_default(self):
     self.assertEqual(
         params.get_bool_param_value('foo', PARAMS, default=True), True)
     self.assertEqual(
         params.get_bool_param_value('foo', PARAMS, default=False), False)
Beispiel #11
0
    def test_get_bool_param_value_malformed(self):
        with self.assertRaises(ValueError):
            params.get_bool_param_value('malformed_bool_param', PARAMS)

        with self.assertRaises(ValueError):
            params.get_bool_param_value('empty_bool_param', PARAMS)
Beispiel #12
0
 def test_get_bool_param_value_no_default(self):
     self.assertEqual(params.get_bool_param_value('foo', PARAMS), None)
Beispiel #13
0
 def test_get_bool_param_value(self):
     self.assertEqual(
         params.get_bool_param_value('false_bool_param', PARAMS), False)
     self.assertEqual(
         params.get_bool_param_value('true_bool_param', PARAMS), True)