def test_get_bool_param_value(self) -> None: self.assertEqual( params.get_bool_param_value("false_bool_param", PARAMS, default=True), False) self.assertEqual( params.get_bool_param_value("true_bool_param", PARAMS, default=False), True)
def test_get_bool_param_value_malformed(self) -> None: with self.assertRaises(ValueError): params.get_bool_param_value("malformed_bool_param", PARAMS, default=False) with self.assertRaises(ValueError): params.get_bool_param_value("empty_bool_param", PARAMS, default=False)
def handle_new_files() -> Tuple[str, HTTPStatus]: """Normalizes and splits files in the ingest bucket for a given region as is appropriate. Will schedule the next process_job task if no renaming / splitting work has been done that will trigger subsequent calls to this endpoint. """ logging.info('Received request for direct ingest handle_new_files: %s', request.values) region_code = get_str_param_value('region', request.values) can_start_ingest = \ get_bool_param_value('can_start_ingest', request.values, default=False) if not region_code or can_start_ingest is None: return f'Bad parameters [{request.values}]', HTTPStatus.BAD_REQUEST with monitoring.push_region_tag(region_code): try: controller = controller_for_region_code(region_code, allow_unlaunched=True) except DirectIngestError as e: if e.is_bad_request(): return str(e), HTTPStatus.BAD_REQUEST raise e if not isinstance(controller, GcsfsDirectIngestController): raise DirectIngestError( msg=f"Unexpected controller type [{type(controller)}].", error_type=DirectIngestErrorType.INPUT_ERROR) controller.handle_new_files(can_start_ingest=can_start_ingest) return '', HTTPStatus.OK
def handle_direct_ingest_file() -> Tuple[str, HTTPStatus]: """Called from a Cloud Function when a new file is added to a direct ingest bucket. Will trigger a job that deals with normalizing and splitting the file as is appropriate, then start the scheduler if allowed. """ region_code = get_str_param_value('region', request.args) # The bucket name for the file to ingest bucket = get_str_param_value('bucket', request.args) # The relative path to the file, not including the bucket name relative_file_path = get_str_param_value('relative_file_path', request.args, preserve_case=True) start_ingest = \ get_bool_param_value('start_ingest', request.args, default=False) if not region_code or not bucket \ or not relative_file_path or start_ingest is None: return f'Bad parameters [{request.args}]', HTTPStatus.BAD_REQUEST with monitoring.push_region_tag(region_code): controller = controller_for_region_code(region_code, allow_unlaunched=True) if not isinstance(controller, GcsfsDirectIngestController): raise DirectIngestError( msg=f"Unexpected controller type [{type(controller)}].", error_type=DirectIngestErrorType.INPUT_ERROR) path = GcsfsPath.from_bucket_and_blob_name( bucket_name=bucket, blob_name=relative_file_path) if isinstance(path, GcsfsFilePath): controller.handle_file(path, start_ingest=start_ingest) return '', HTTPStatus.OK
def scheduler() -> Tuple[str, HTTPStatus]: """Checks the state of the ingest instance and schedules any tasks to be run.""" logging.info("Received request for direct ingest scheduler: %s", request.values) region_code = get_str_param_value("region", request.values) just_finished_job = get_bool_param_value("just_finished_job", request.values, default=False) # The bucket name for ingest instance to schedule work out of bucket = get_str_param_value("bucket", request.args) if not region_code or just_finished_job is None or not bucket: response = f"Bad parameters [{request.values}]" logging.error(response) return response, HTTPStatus.BAD_REQUEST bucket_path = GcsfsBucketPath(bucket) with monitoring.push_region_tag( region_code, ingest_instance=DirectIngestInstance.for_ingest_bucket( bucket_path).value, ): try: controller = DirectIngestControllerFactory.build( ingest_bucket_path=bucket_path, allow_unlaunched=False) except DirectIngestError as e: if e.is_bad_request(): logging.error(str(e)) return str(e), HTTPStatus.BAD_REQUEST raise e controller.schedule_next_ingest_job(just_finished_job) return "", HTTPStatus.OK
def handle_validation_request(): """API endpoint to service data validation requests.""" should_update_views = get_bool_param_value('should_update_views', request.args, default=False) failed_validations = execute_validation( should_update_views=should_update_views) return _readable_response(failed_validations), HTTPStatus.OK
def handle_direct_ingest_file() -> Tuple[str, HTTPStatus]: """Called from a Cloud Function when a new file is added to a direct ingest bucket. Will trigger a job that deals with normalizing and splitting the file as is appropriate, then start the scheduler if allowed. """ region_code = get_str_param_value("region", request.args) # The bucket name for the file to ingest bucket = get_str_param_value("bucket", request.args) # The relative path to the file, not including the bucket name relative_file_path = get_str_param_value("relative_file_path", request.args, preserve_case=True) start_ingest = get_bool_param_value("start_ingest", request.args, default=False) if not region_code or not bucket or not relative_file_path or start_ingest is None: response = f"Bad parameters [{request.args}]" logging.error(response) return response, HTTPStatus.BAD_REQUEST bucket_path = GcsfsBucketPath(bucket_name=bucket) with monitoring.push_region_tag( region_code, ingest_instance=DirectIngestInstance.for_ingest_bucket( bucket_path).value, ): try: controller = DirectIngestControllerFactory.build( ingest_bucket_path=bucket_path, allow_unlaunched=True, ) except DirectIngestError as e: if e.is_bad_request(): logging.error(str(e)) return str(e), HTTPStatus.BAD_REQUEST raise e path = GcsfsPath.from_bucket_and_blob_name( bucket_name=bucket, blob_name=relative_file_path) if isinstance(path, GcsfsFilePath): controller.handle_file(path, start_ingest=start_ingest) return "", HTTPStatus.OK
def scheduler() -> Tuple[str, HTTPStatus]: logging.info('Received request for direct ingest scheduler: %s', request.values) region_code = get_str_param_value('region', request.values) just_finished_job = \ get_bool_param_value('just_finished_job', request.values, default=False) if not region_code or just_finished_job is None: return f'Bad parameters [{request.values}]', HTTPStatus.BAD_REQUEST with monitoring.push_region_tag(region_code): try: controller = controller_for_region_code(region_code) except DirectIngestError as e: if e.is_bad_request(): return str(e), HTTPStatus.BAD_REQUEST raise e controller.schedule_next_ingest_job_or_wait_if_necessary( just_finished_job) return '', HTTPStatus.OK
def handle_new_files() -> Tuple[str, HTTPStatus]: """Normalizes and splits files in the ingest bucket for a given region as is appropriate. Will schedule the next process_job task if no renaming / splitting work has been done that will trigger subsequent calls to this endpoint. """ logging.info("Received request for direct ingest handle_new_files: %s", request.values) region_code = get_str_param_value("region", request.values) can_start_ingest = get_bool_param_value("can_start_ingest", request.values, default=False) bucket = get_str_param_value("bucket", request.values) if not region_code or can_start_ingest is None or not bucket: response = f"Bad parameters [{request.values}]" logging.error(response) return response, HTTPStatus.BAD_REQUEST bucket_path = GcsfsBucketPath(bucket_name=bucket) with monitoring.push_region_tag( region_code, ingest_instance=DirectIngestInstance.for_ingest_bucket( bucket_path).value, ): try: controller = DirectIngestControllerFactory.build( ingest_bucket_path=bucket_path, allow_unlaunched=True, ) except DirectIngestError as e: if e.is_bad_request(): logging.error(str(e)) return str(e), HTTPStatus.BAD_REQUEST raise e controller.handle_new_files(can_start_ingest=can_start_ingest) return "", HTTPStatus.OK
def test_get_bool_param_value_default(self): self.assertEqual( params.get_bool_param_value('foo', PARAMS, default=True), True) self.assertEqual( params.get_bool_param_value('foo', PARAMS, default=False), False)
def test_get_bool_param_value_malformed(self): with self.assertRaises(ValueError): params.get_bool_param_value('malformed_bool_param', PARAMS) with self.assertRaises(ValueError): params.get_bool_param_value('empty_bool_param', PARAMS)
def test_get_bool_param_value_no_default(self): self.assertEqual(params.get_bool_param_value('foo', PARAMS), None)
def test_get_bool_param_value(self): self.assertEqual( params.get_bool_param_value('false_bool_param', PARAMS), False) self.assertEqual( params.get_bool_param_value('true_bool_param', PARAMS), True)