Beispiel #1
0
    def create_direct_ingest_process_job_task(self, region: Region,
                                              ingest_args: IngestArgs):
        body = self._get_body_from_args(ingest_args)

        task_name = self._build_task_name_for_queue_and_region(
            region.get_queue_name(), region.region_code,
            ingest_args.task_id_tag())

        task = tasks.types.Task(
            name=task_name,
            app_engine_http_request={
                'relative_uri':
                f'/direct/process_job?region={region.region_code}',
                'body': json.dumps(body).encode()
            })
        self._queue_task(region.get_queue_name(), task)
def check_is_region_launched_in_env(region: Region) -> None:
    """Checks if direct ingest has been launched for the provided |region| in the current GAE env and throws if it has
    not."""
    if not region.is_ingest_launched_in_env():
        gae_env = environment.get_gae_environment()
        error_msg = f'Bad environment [{gae_env}] for region [{region.region_code}].'
        logging.error(error_msg)
        raise DirectIngestError(
            msg=error_msg, error_type=DirectIngestErrorType.ENVIRONMENT_ERROR)
Beispiel #3
0
 def _get_process_job_queue_manager(
     self, region: Region
 ) -> CloudTaskQueueManager[ProcessIngestJobCloudTaskQueueInfo]:
     if region.region_code not in self.region_process_job_queue_managers:
         self.region_process_job_queue_managers[
             region.region_code] = CloudTaskQueueManager(
                 queue_info_cls=ProcessIngestJobCloudTaskQueueInfo,
                 queue_name=region.get_queue_name(),
             )
     return self.region_process_job_queue_managers[region.region_code]
Beispiel #4
0
def mock_region(region_code, queue_name=None, is_stoppable=False):
    return Region(
        region_code=region_code,
        shared_queue=queue_name or None,
        agency_name='the agency',
        agency_type='benevolent',
        base_url='localhost:3000',
        names_file='names.txt',
        timezone='America/New_York',
        environment='production',
        jurisdiction_id='jurisdiction_id',
        is_stoppable=is_stoppable or False,
    )
def _mock_region():
    return Region(
        region_code=_REGION_CODE,
        shared_queue='queue',
        agency_name='the agency',
        agency_type='benevolent',
        base_url='localhost:3000',
        names_file='names.txt',
        timezone='America/Chicago',
        environment='production',
        jurisdiction_id='01071001',
        is_stoppable=False,
    )
def _mock_region():
    return Region(
        region_code=_REGION_CODE,
        shared_queue="queue",
        agency_name="the agency",
        agency_type="benevolent",
        base_url="localhost:3000",
        names_file="names.txt",
        timezone="America/Chicago",
        environment="production",
        jurisdiction_id="01071001",
        is_stoppable=False,
    )
Beispiel #7
0
def mock_region(region_code, queue_name=None, is_stoppable=False):
    return Region(
        region_code=region_code,
        shared_queue=queue_name or None,
        agency_name="the agency",
        agency_type="benevolent",
        base_url="localhost:3000",
        names_file="names.txt",
        timezone="America/New_York",
        environment="production",
        jurisdiction_id="00000000",  # must be 8 character numeric string
        is_stoppable=is_stoppable or False,
    )
    def create_direct_ingest_process_job_task(self, region: Region,
                                              ingest_args: IngestArgs):
        task_id = _build_task_id(region.region_code,
                                 ingest_args.task_id_tag(),
                                 prefix_only=False)
        relative_uri = f'/direct/process_job?region={region.region_code}'
        body = self._get_body_from_args(ingest_args)

        self.cloud_task_client.create_task(
            task_id=task_id,
            queue_name=region.get_queue_name(),
            relative_uri=relative_uri,
            body=body,
        )
Beispiel #9
0
def run_scraper_for_region(region: regions.Region, args: argparse.Namespace) -> None:
    """Runs the scraper for the given region

    Creates and manages an in-memory FIFO queue to replicate production.
    """

    # Don't initialize a ScraperCloudTaskManager when running locally.
    scraper_module.ScraperCloudTaskManager = lambda: None  # type: ignore
    scraper = region.get_scraper()
    scraper.BATCH_WRITES = False
    task_queue: deque = deque()

    # We use this to bind the method to the instance.
    scraper.add_task = types.MethodType(partial(add_task, task_queue), scraper)
    scraper.start_scrape = types.MethodType(partial(start_scrape, task_queue), scraper)

    scraper.start_scrape(constants.ScrapeType.BACKGROUND)

    num_tasks_run = 0
    while task_queue and (num_tasks_run < args.num_tasks or args.run_forever):
        logging.info("***")
        logging.info(
            "Running task [%d] of [%s] tasks",
            num_tasks_run,
            "infinite" if args.run_forever else args.num_tasks,
        )

        # run the task
        if args.lifo:
            method, request = task_queue.pop()
        else:
            method, request = task_queue.popleft()
        try:
            getattr(scraper, method)(request)
        except Exception as e:
            if args.fail_fast or e is KeyboardInterrupt:
                raise
            traceback.print_exc()

        # increment and sleep
        num_tasks_run += 1
        logging.info(
            "Sleeping [%s] seconds before sending another request",
            args.sleep_between_requests,
        )
        time.sleep(args.sleep_between_requests)

    logging.info("Completed the test run!")
Beispiel #10
0
def fake_region(*,
                region_code: str = 'us_ca',
                agency_type: str = 'prison',
                environment: str = 'local',
                jurisdiction_id: str = 'unknown',
                ingestor: Optional[Union[BaseScraper,
                                         BaseDirectIngestController]] = None):
    region = create_autospec(Region)
    region.region_code = region_code
    region.agency_type = agency_type
    region.environment = environment
    region.jurisdiction_id = jurisdiction_id
    region.get_ingestor.return_value = \
        ingestor if ingestor else create_autospec(BaseDirectIngestController)
    region.is_ingest_launched_in_env.return_value = \
        Region.is_ingest_launched_in_env(region)
    return region
    def make_sql_preprocessing_flag_region(
            raw_vs_ingest_file_name_differentiation_enabled_env: Optional[
                str] = None,
            raw_data_bq_imports_enabled_env: Optional[str] = None,
            ingest_view_exports_enabled_env: Optional[str] = None):
        region_code = 'us_mo'

        flag_overrides = {
            'raw_vs_ingest_file_name_differentiation_enabled_env':
            raw_vs_ingest_file_name_differentiation_enabled_env,
            'raw_data_bq_imports_enabled_env': raw_data_bq_imports_enabled_env,
            'ingest_view_exports_enabled_env': ingest_view_exports_enabled_env,
        }

        kwargs = {**get_region_manifest(region_code, True), **flag_overrides}

        return Region(region_code=region_code, is_direct_ingest=True, **kwargs)
Beispiel #12
0
    def make_sql_preprocessing_flag_region(
        raw_vs_ingest_file_name_differentiation_enabled_env: Optional[str] = None,
        raw_data_bq_imports_enabled_env: Optional[str] = None,
        ingest_view_exports_enabled_env: Optional[str] = None,
    ):
        region_code = "us_mo"

        flag_overrides = {
            "raw_vs_ingest_file_name_differentiation_enabled_env": raw_vs_ingest_file_name_differentiation_enabled_env,
            "raw_data_bq_imports_enabled_env": raw_data_bq_imports_enabled_env,
            "ingest_view_exports_enabled_env": ingest_view_exports_enabled_env,
        }

        kwargs = {
            **get_region_manifest(region_code, direct_ingest_regions_module),
            **flag_overrides,
        }

        return Region(region_code=region_code, is_direct_ingest=True, **kwargs)
def fake_region(*,
                region_code: str = 'us_xx',
                agency_type: str = 'prison',
                environment: str = 'local',
                jurisdiction_id: str = 'unknown',
                ingestor: Optional[Union[BaseScraper,
                                         BaseDirectIngestController]] = None,
                is_raw_vs_ingest_file_name_detection_enabled: bool = False,
                are_raw_data_bq_imports_enabled_in_env: bool = False,
                are_ingest_view_exports_enabled_in_env: bool = False):
    region = create_autospec(Region)
    region.region_code = region_code
    region.agency_type = agency_type
    region.environment = environment
    region.jurisdiction_id = jurisdiction_id
    region.get_ingestor.return_value = \
        ingestor if ingestor else create_autospec(BaseDirectIngestController)
    region.is_ingest_launched_in_env.return_value = \
        Region.is_ingest_launched_in_env(region)
    region.is_raw_vs_ingest_file_name_detection_enabled.return_value = is_raw_vs_ingest_file_name_detection_enabled
    region.are_raw_data_bq_imports_enabled_in_env.return_value = are_raw_data_bq_imports_enabled_in_env
    region.are_ingest_view_exports_enabled_in_env.return_value = are_ingest_view_exports_enabled_in_env
    return region
Beispiel #14
0
 def fake_is_launched_in_env():
     return Region.is_ingest_launched_in_env(region)
Beispiel #15
0
 def get_process_job_queue_info(self, region: Region) -> CloudTaskQueueInfo:
     return self._get_queue_info(region.get_queue_name(),
                                 region.region_code)
Beispiel #16
0
def get_parent_class(region: regions.Region) -> type:
    return inspect.getmro(region.get_ingestor_class())[1]