Python DirectIngestInstance 예제들, recidiviz.ingest.direct.controllers.direct_ingest_instance.DirectIngestInstance Python 예제들

예제 #1

0

파일 보기

파일: ingest_ops.py 프로젝트: Recidiviz/pulse-data

    def _acquire_ingest_lock() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"])
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance=ingest_instance)
        try:
            lock_manager.acquire_lock()
        except GCSPseudoLockAlreadyExists:
            return "lock already exists", HTTPStatus.CONFLICT

        if not lock_manager.can_proceed():
            try:
                lock_manager.release_lock()
            except Exception as e:
                logging.exception(e)
            return (
                "other locks blocking ingest have been acquired; releasing lock",
                HTTPStatus.CONFLICT,
            )

        return "", HTTPStatus.OK

예제 #2

0

파일 보기

def scheduler() -> Tuple[str, HTTPStatus]:
    """Checks the state of the ingest instance and schedules any tasks to be run."""
    logging.info("Received request for direct ingest scheduler: %s",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    just_finished_job = get_bool_param_value("just_finished_job",
                                             request.values,
                                             default=False)

    # The bucket name for ingest instance to schedule work out of
    bucket = get_str_param_value("bucket", request.args)

    if not region_code or just_finished_job is None or not bucket:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    bucket_path = GcsfsBucketPath(bucket)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                bucket_path).value,
    ):
        try:
            controller = DirectIngestControllerFactory.build(
                ingest_bucket_path=bucket_path, allow_unlaunched=False)
        except DirectIngestError as e:
            if e.is_bad_request():
                logging.error(str(e))
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        controller.schedule_next_ingest_job(just_finished_job)
    return "", HTTPStatus.OK

예제 #3

0

파일 보기

파일: ingest_operations_store.py 프로젝트: Recidiviz/pulse-data

 def _get_database_name_for_state(state_code: StateCode,
                                  instance: DirectIngestInstance) -> str:
     """Returns the database name for the given state and instance"""
     return SQLAlchemyDatabaseKey.for_state_code(
         state_code,
         instance.database_version(SystemLevel.STATE,
                                   state_code=state_code),
     ).db_name

예제 #4

0

파일 보기

    def __init__(self, ingest_bucket_path: GcsfsBucketPath) -> None:
        """Initialize the controller."""
        self.cloud_task_manager = DirectIngestCloudTaskManagerImpl()
        self.ingest_instance = DirectIngestInstance.for_ingest_bucket(
            ingest_bucket_path)
        self.region_lock_manager = DirectIngestRegionLockManager.for_direct_ingest(
            region_code=self.region.region_code,
            schema_type=self.system_level.schema_type(),
            ingest_instance=self.ingest_instance,
        )
        self.fs = DirectIngestGCSFileSystem(GcsfsFactory.build())
        self.ingest_bucket_path = ingest_bucket_path
        self.storage_directory_path = (
            gcsfs_direct_ingest_storage_directory_path_for_region(
                region_code=self.region_code(),
                system_level=self.system_level,
                ingest_instance=self.ingest_instance,
            ))

        self.temp_output_directory_path = (
            gcsfs_direct_ingest_temporary_output_directory_path())

        self.file_prioritizer = GcsfsDirectIngestJobPrioritizer(
            self.fs,
            self.ingest_bucket_path,
            self.get_file_tag_rank_list(),
        )

        self.ingest_file_split_line_limit = self._INGEST_FILE_SPLIT_LINE_LIMIT

        self.file_metadata_manager = PostgresDirectIngestFileMetadataManager(
            region_code=self.region.region_code,
            ingest_database_name=self.ingest_database_key.db_name,
        )

        self.raw_file_import_manager = DirectIngestRawFileImportManager(
            region=self.region,
            fs=self.fs,
            ingest_bucket_path=self.ingest_bucket_path,
            temp_output_directory_path=self.temp_output_directory_path,
            big_query_client=BigQueryClientImpl(),
        )

        self.ingest_view_export_manager = DirectIngestIngestViewExportManager(
            region=self.region,
            fs=self.fs,
            output_bucket_name=self.ingest_bucket_path.bucket_name,
            file_metadata_manager=self.file_metadata_manager,
            big_query_client=BigQueryClientImpl(),
            view_collector=DirectIngestPreProcessedIngestViewCollector(
                self.region, self.get_file_tag_rank_list()),
            launched_file_tags=self.get_file_tag_rank_list(),
        )

        self.ingest_instance_status_manager = DirectIngestInstanceStatusManager(
            self.region_code(), self.ingest_instance)

예제 #5

0

파일 보기

파일: ingest_ops.py 프로젝트: Recidiviz/pulse-data

    def _export_database_to_gcs() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"].upper())
            db_version = ingest_instance.database_version(
                system_level=SystemLevel.STATE, state_code=state_code)
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance)
        if not lock_manager.can_proceed():
            return (
                "other locks blocking ingest have been acquired; aborting operation",
                HTTPStatus.CONFLICT,
            )

        db_key = SQLAlchemyDatabaseKey.for_state_code(state_code, db_version)
        cloud_sql_client = CloudSQLClientImpl(project_id=project_id)

        operation_id = cloud_sql_client.export_to_gcs_sql(
            db_key,
            GcsfsFilePath.from_absolute_path(
                f"{STATE_INGEST_EXPORT_URI}/{db_version.value}/{state_code.value}"
            ),
        )
        if operation_id is None:
            return (
                "Cloud SQL export operation was not started successfully.",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        operation_succeeded = cloud_sql_client.wait_until_operation_completed(
            operation_id, seconds_to_wait=GCS_IMPORT_EXPORT_TIMEOUT_SEC)
        if not operation_succeeded:
            return (
                "Cloud SQL import did not complete within 60 seconds",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        return operation_id, HTTPStatus.OK

예제 #6

0

파일 보기

파일: direct_ingest_instance_test.py 프로젝트: Recidiviz/pulse-data

    def test_from_county_ingest_bucket(self) -> None:
        ingest_bucket_path = gcsfs_direct_ingest_bucket_for_region(
            region_code="us_xx_yyyyy",
            system_level=SystemLevel.COUNTY,
            ingest_instance=DirectIngestInstance.PRIMARY,
            project_id="recidiviz-456",
        )

        self.assertEqual(
            DirectIngestInstance.PRIMARY,
            DirectIngestInstance.for_ingest_bucket(ingest_bucket_path),
        )

예제 #7

0

파일 보기

    def test_direct_ingest_instance_status_contains_data_for_all_states(
            self) -> None:
        '''Enforces that after all migrations the set of direct ingest instance statuses
        matches the list of known states.

        If this test fails, you will likely have to add a new migration because a new state
        was recently created. To do so, first run:
        ```
        python -m recidiviz.tools.migrations.autogenerate_migration \
            --database OPERATIONS \
            --message add_us_xx
        ```

        This will generate a blank migration. You should then modify the migration, changing
        the `upgrade` method to look like:
        ```
        def upgrade() -> None:
            op.execute("""
                INSERT INTO direct_ingest_instance_status (region_code, instance, is_paused) VALUES
                ('US_XX', 'PRIMARY', TRUE),
                ('US_XX', 'SECONDARY', TRUE);
            """)
        ```

        Afterwards, this test should ideally pass.
        '''

        with runner(self.default_config(), self.engine) as r:
            r.migrate_up_to("head")

            engine = create_engine(
                local_postgres_helpers.postgres_db_url_from_env_vars())

            conn = engine.connect()
            rows = conn.execute(
                "SELECT region_code, instance FROM direct_ingest_instance_status;"
            )

            instance_to_state_codes = defaultdict(set)
            for row in rows:
                instance_to_state_codes[DirectIngestInstance(row[1])].add(
                    row[0])

            required_states = {
                name.upper()
                for name in get_existing_region_dir_names()
            }

            for instance in DirectIngestInstance:
                self.assertEqual(required_states,
                                 instance_to_state_codes[instance])

예제 #8

0

파일 보기

파일: direct_ingest_cloud_task_manager.py 프로젝트: Recidiviz/pulse-data

    def is_task_queued(self, region: Region,
                       ingest_args: GcsfsIngestArgs) -> bool:
        """Returns true if the ingest_args correspond to a task currently in
        the queue.
        """

        task_id_prefix = _build_task_id(
            region.region_code,
            DirectIngestInstance.for_ingest_bucket(
                ingest_args.file_path.bucket_path),
            ingest_args.task_id_tag(),
            prefix_only=True,
        )

        return bool(next(self._tasks_for_prefix(task_id_prefix), None))

예제 #9

0

파일 보기

파일: direct_ingest_instance_test.py 프로젝트: Recidiviz/pulse-data

    def test_from_state_ingest_bucket(self) -> None:
        ingest_bucket_path = gcsfs_direct_ingest_bucket_for_region(
            region_code="us_xx",
            system_level=SystemLevel.STATE,
            ingest_instance=DirectIngestInstance.PRIMARY,
            project_id="recidiviz-456",
        )

        self.assertEqual(
            DirectIngestInstance.PRIMARY,
            DirectIngestInstance.for_ingest_bucket(ingest_bucket_path),
        )

        ingest_bucket_path = gcsfs_direct_ingest_bucket_for_region(
            region_code="us_xx",
            system_level=SystemLevel.STATE,
            ingest_instance=DirectIngestInstance.SECONDARY,
            project_id="recidiviz-456",
        )

        self.assertEqual(
            DirectIngestInstance.SECONDARY,
            DirectIngestInstance.for_ingest_bucket(ingest_bucket_path),
        )

예제 #10

0

파일 보기

파일: ingest_ops.py 프로젝트: Recidiviz/pulse-data

    def _release_ingest_lock() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"])
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        lock_manager = DirectIngestRegionLockManager.for_state_ingest(
            state_code, ingest_instance=ingest_instance)
        try:
            lock_manager.release_lock()
        except GCSPseudoLockDoesNotExist:
            return "lock does not exist", HTTPStatus.NOT_FOUND

        return "", HTTPStatus.OK

예제 #11

0

파일 보기

def handle_direct_ingest_file() -> Tuple[str, HTTPStatus]:
    """Called from a Cloud Function when a new file is added to a direct ingest
    bucket. Will trigger a job that deals with normalizing and splitting the
    file as is appropriate, then start the scheduler if allowed.
    """
    region_code = get_str_param_value("region", request.args)
    # The bucket name for the file to ingest
    bucket = get_str_param_value("bucket", request.args)
    # The relative path to the file, not including the bucket name
    relative_file_path = get_str_param_value("relative_file_path",
                                             request.args,
                                             preserve_case=True)
    start_ingest = get_bool_param_value("start_ingest",
                                        request.args,
                                        default=False)

    if not region_code or not bucket or not relative_file_path or start_ingest is None:
        response = f"Bad parameters [{request.args}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    bucket_path = GcsfsBucketPath(bucket_name=bucket)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                bucket_path).value,
    ):
        try:
            controller = DirectIngestControllerFactory.build(
                ingest_bucket_path=bucket_path,
                allow_unlaunched=True,
            )
        except DirectIngestError as e:
            if e.is_bad_request():
                logging.error(str(e))
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        path = GcsfsPath.from_bucket_and_blob_name(
            bucket_name=bucket, blob_name=relative_file_path)

        if isinstance(path, GcsfsFilePath):
            controller.handle_file(path, start_ingest=start_ingest)

    return "", HTTPStatus.OK

예제 #12

0

파일 보기

파일: ingest_ops.py 프로젝트: Recidiviz/pulse-data

    def _pause_direct_ingest_instance() -> Tuple[str, HTTPStatus]:
        try:
            state_code = StateCode(request.json["stateCode"])
            ingest_instance = DirectIngestInstance(
                request.json["ingestInstance"])
        except ValueError:
            return "invalid parameters provided", HTTPStatus.BAD_REQUEST

        ingest_status_manager = DirectIngestInstanceStatusManager(
            region_code=state_code.value, ingest_instance=ingest_instance)
        try:
            ingest_status_manager.pause_instance()
        except Exception:
            return (
                "something went wrong pausing the intance",
                HTTPStatus.INTERNAL_SERVER_ERROR,
            )

        return "", HTTPStatus.OK

예제 #13

0

파일 보기

def main(state_code: StateCode, ingest_instance: DirectIngestInstance) -> None:
    """Executes the main flow of the script."""
    print(
        f"RUN THE FOLLOWING COMMANDS IN ORDER TO DELETE ALL DATA FOR REGION [{state_code.value}]"
    )
    print(
        "********************************************************************************"
    )
    db_version = ingest_instance.database_version(SystemLevel.STATE,
                                                  state_code)
    db_key = SQLAlchemyDatabaseKey.for_state_code(state_code=state_code,
                                                  db_version=db_version)

    # Connect to correct database for instance first
    print(f"\\c {db_key.db_name}")

    # Then run deletion commands
    for cmd in generate_region_deletion_commands(state_code, db_version):
        print(cmd)

    print(
        "********************************************************************************"
    )
    print("HOW TO PERFORM DELETION:")
    print(
        "1) Log into prod data client (`gcloud compute ssh prod-data-client --project=recidiviz-123`)"
    )
    print("\n> For production deletion:")
    print(
        "2) Go to secret manager to get login credentials stored in `state_db_user` and `state_db_password` secrets:"
        "\n\thttps://console.cloud.google.com/security/secret-manager?organizationId=448885369991&"
        "project=recidiviz-123")
    print("3) Log into postgres database (`prod-state-psql`)")
    print("\n> For staging deletion:")
    print(
        "2) Go to secret manager to get login credentials stored in `state_db_user` and `state_db_password` secrets:"
        "\n\thttps://console.cloud.google.com/security/secret-manager?organizationId=448885369991&"
        "project=recidiviz-staging")
    print("3) Log into postgres database (`dev-state-psql`)")
    print("\n> For all:")
    print(
        "4) Paste full list of commands listed above in postgres command line and run. Some commands may take a "
        "while to run.")

예제 #14

0

파일 보기

def handle_new_files() -> Tuple[str, HTTPStatus]:
    """Normalizes and splits files in the ingest bucket for a given region as
    is appropriate. Will schedule the next process_job task if no renaming /
    splitting work has been done that will trigger subsequent calls to this
    endpoint.
    """
    logging.info("Received request for direct ingest handle_new_files: %s",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    can_start_ingest = get_bool_param_value("can_start_ingest",
                                            request.values,
                                            default=False)
    bucket = get_str_param_value("bucket", request.values)

    if not region_code or can_start_ingest is None or not bucket:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    bucket_path = GcsfsBucketPath(bucket_name=bucket)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                bucket_path).value,
    ):
        try:
            controller = DirectIngestControllerFactory.build(
                ingest_bucket_path=bucket_path,
                allow_unlaunched=True,
            )
        except DirectIngestError as e:
            if e.is_bad_request():
                logging.error(str(e))
                return str(e), HTTPStatus.BAD_REQUEST
            raise e

        controller.handle_new_files(can_start_ingest=can_start_ingest)
    return "", HTTPStatus.OK

예제 #15

0

파일 보기

def process_job() -> Tuple[str, HTTPStatus]:
    """Processes a single direct ingest file, specified in the provided ingest
    arguments.
    """
    logging.info("Received request to process direct ingest job: [%s]",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    file_path = get_str_param_value("file_path",
                                    request.values,
                                    preserve_case=True)

    if not region_code or not file_path:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    gcsfs_path = GcsfsFilePath.from_absolute_path(file_path)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                gcsfs_path.bucket_path).value,
    ):
        json_data = request.get_data(as_text=True)
        ingest_args = _parse_cloud_task_args(json_data)

        if not ingest_args:
            raise DirectIngestError(
                msg="process_job was called with no GcsfsIngestArgs.",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        if not isinstance(ingest_args, GcsfsIngestArgs):
            raise DirectIngestError(
                msg=
                f"process_job was called with incorrect args type [{type(ingest_args)}].",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        if gcsfs_path != ingest_args.file_path:
            raise DirectIngestError(
                msg=f"Different paths were passed in the url and request body\n"
                f"url: {gcsfs_path.uri()}\n"
                f"body: {ingest_args.file_path.uri()}",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        with monitoring.push_tags(
            {TagKey.INGEST_TASK_TAG: ingest_args.task_id_tag()}):
            try:
                controller = DirectIngestControllerFactory.build(
                    ingest_bucket_path=ingest_args.file_path.bucket_path,
                    allow_unlaunched=False,
                )
            except DirectIngestError as e:
                if e.is_bad_request():
                    logging.error(str(e))
                    return str(e), HTTPStatus.BAD_REQUEST
                raise e

            try:
                controller.run_ingest_job_and_kick_scheduler_on_completion(
                    ingest_args)
            except GCSPseudoLockAlreadyExists as e:
                logging.warning(str(e))
                return str(e), HTTPStatus.CONFLICT
    return "", HTTPStatus.OK

예제 #16

0

파일 보기

def main() -> None:
    """Runs the move_state_files_to_deprecated script."""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--file-type",
        required=True,
        choices=[file_type.value for file_type in GcsfsDirectIngestFileType],
        help="Defines whether we should move raw files or generated ingest_view files",
    )

    parser.add_argument("--region", required=True, help="E.g. 'us_nd'")

    parser.add_argument(
        "--ingest-instance",
        required=True,
        choices=[instance.value for instance in DirectIngestInstance],
        help="Defines which ingest instance we should be deprecating files for.",
    )

    parser.add_argument(
        "--dry-run",
        default=True,
        type=str_to_bool,
        help="Runs move in dry-run mode, only prints the file moves it would do.",
    )

    parser.add_argument(
        "--start-date-bound",
        help="The lower bound date to start from, inclusive. For partial moving of ingested files. "
        "E.g. 2019-09-23.",
    )

    parser.add_argument(
        "--end-date-bound",
        help="The upper bound date to end at, inclusive. For partial moving of ingested files. "
        "E.g. 2019-09-23.",
    )

    parser.add_argument(
        "--project-id", help="The id for this particular project, E.g. 'recidiviz-123'"
    )

    parser.add_argument(
        "--file-filter",
        default=None,
        help="Regex name filter - when set, will only move files that match this regex.",
    )

    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO, format="%(message)s")

    MoveFilesToDeprecatedController(
        file_type=GcsfsDirectIngestFileType(args.file_type),
        region_code=args.region,
        ingest_instance=DirectIngestInstance(args.ingest_instance),
        start_date_bound=args.start_date_bound,
        end_date_bound=args.end_date_bound,
        project_id=args.project_id,
        dry_run=args.dry_run,
        file_filter=args.file_filter,
    ).run()

예제 #17

0

파일 보기

def raw_data_import() -> Tuple[str, HTTPStatus]:
    """Imports a single raw direct ingest CSV file from a location in GCS File System to its corresponding raw data
    table in BQ.
    """
    logging.info("Received request to do direct ingest raw data import: [%s]",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    file_path = get_str_param_value("file_path",
                                    request.values,
                                    preserve_case=True)

    if not region_code or not file_path:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    gcsfs_path = GcsfsFilePath.from_absolute_path(file_path)

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                gcsfs_path.bucket_path).value,
    ):
        json_data = request.get_data(as_text=True)
        data_import_args = _parse_cloud_task_args(json_data)

        if not data_import_args:
            raise DirectIngestError(
                msg=
                "raw_data_import was called with no GcsfsRawDataBQImportArgs.",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        if not isinstance(data_import_args, GcsfsRawDataBQImportArgs):
            raise DirectIngestError(
                msg=
                f"raw_data_import was called with incorrect args type [{type(data_import_args)}].",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        if gcsfs_path != data_import_args.raw_data_file_path:
            raise DirectIngestError(
                msg=f"Different paths were passed in the url and request body\n"
                f"url: {gcsfs_path.uri()}\n"
                f"body: {data_import_args.raw_data_file_path.uri()}",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        with monitoring.push_tags(
            {TagKey.RAW_DATA_IMPORT_TAG: data_import_args.task_id_tag()}):
            try:
                controller = DirectIngestControllerFactory.build(
                    ingest_bucket_path=data_import_args.raw_data_file_path.
                    bucket_path,
                    allow_unlaunched=False,
                )
            except DirectIngestError as e:
                if e.is_bad_request():
                    logging.error(str(e))
                    return str(e), HTTPStatus.BAD_REQUEST
                raise e

            controller.do_raw_data_import(data_import_args)
    return "", HTTPStatus.OK

예제 #18

0

파일 보기

파일: direct_ingest_cloud_task_manager.py 프로젝트: Recidiviz/pulse-data

def _is_legacy_instance(system_level: SystemLevel, region_code: str,
                        ingest_instance: DirectIngestInstance) -> bool:
    return (ingest_instance.database_version(system_level,
                                             StateCode.get(region_code)) is
            SQLAlchemyStateDatabaseVersion.LEGACY)

예제 #19

0

파일 보기

def ingest_view_export() -> Tuple[str, HTTPStatus]:
    """Exports an ingest view from BQ to a file in the region's GCS File System ingest bucket that is ready to be
    processed and ingested into our Recidiviz DB.
    """
    logging.info("Received request to do direct ingest view export: [%s]",
                 request.values)
    region_code = get_str_param_value("region", request.values)
    output_bucket_name = get_str_param_value("output_bucket",
                                             request.values,
                                             preserve_case=True)

    if not region_code or not output_bucket_name:
        response = f"Bad parameters [{request.values}]"
        logging.error(response)
        return response, HTTPStatus.BAD_REQUEST

    with monitoring.push_region_tag(
            region_code,
            ingest_instance=DirectIngestInstance.for_ingest_bucket(
                GcsfsBucketPath(output_bucket_name)).value,
    ):
        json_data = request.get_data(as_text=True)
        ingest_view_export_args = _parse_cloud_task_args(json_data)

        if not ingest_view_export_args:
            raise DirectIngestError(
                msg=
                "raw_data_import was called with no GcsfsIngestViewExportArgs.",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        if not isinstance(ingest_view_export_args, GcsfsIngestViewExportArgs):
            raise DirectIngestError(
                msg=
                f"raw_data_import was called with incorrect args type [{type(ingest_view_export_args)}].",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        if output_bucket_name != ingest_view_export_args.output_bucket_name:
            raise DirectIngestError(
                msg=
                f"Different buckets were passed in the url and request body\n"
                f"url: {output_bucket_name}\n"
                f"body: {ingest_view_export_args.output_bucket_name}",
                error_type=DirectIngestErrorType.INPUT_ERROR,
            )

        with monitoring.push_tags({
                TagKey.INGEST_VIEW_EXPORT_TAG:
                ingest_view_export_args.task_id_tag()
        }):
            try:
                controller = DirectIngestControllerFactory.build(
                    ingest_bucket_path=GcsfsBucketPath(
                        ingest_view_export_args.output_bucket_name),
                    allow_unlaunched=False,
                )
            except DirectIngestError as e:
                if e.is_bad_request():
                    logging.error(str(e))
                    return str(e), HTTPStatus.BAD_REQUEST
                raise e

            controller.do_ingest_view_export(ingest_view_export_args)
    return "", HTTPStatus.OK