Example #1
0
def build_queues(environments: Set[str]):
    qs: List[Dict[str, Any]] = []
    qs.append({
        'name': queues.SCRAPER_PHASE_QUEUE,
        **BASE_QUEUE_CONFIG, 'rate': '1/s'
    })
    for vendor in vendors.get_vendors():
        queue_params = vendors.get_vendor_queue_params(vendor)
        if queue_params is None:
            continue
        qs.append({
            'name': 'vendor-{}-scraper'.format(vendor.replace('_', '-')),
            **BASE_QUEUE_CONFIG,
            **queue_params
        })
    for region in regions.get_supported_regions():
        if region.shared_queue or region.environment not in environments:
            continue
        qs.append({
            'name': region.get_queue_name(),
            **BASE_QUEUE_CONFIG,
            **(region.queue or {})
        })

    qs.append(DIRECT_INGEST_JAILS_TASK_QUEUE_CONFIG)
    qs.append(DIRECT_INGEST_STATE_TASK_QUEUE_CONFIG)
    qs.append(DIRECT_INGEST_SCHEDULER_QUEUE_CONFIG)
    qs.append(BIGQUERY_QUEUE_CONFIG)
    with open('queue.yaml', 'w') as queue_manifest:
        yaml.dump({'queue': qs},
                  queue_manifest,
                  default_flow_style=False,
                  Dumper=NoAliasDumper)
Example #2
0
    def test_initialize_queues(self):
        # Act
        with metadata.local_project_id_override('my-project-id'):
            google_cloud_task_queue_config.initialize_queues(
                google_auth_token='fake-auth-token')

        # Assert
        queues_updated_by_id = self.get_updated_queues()
        for queue in queues_updated_by_id.values():
            self.assertTrue(
                queue.name.startswith(
                    'projects/my-project-id/locations/us-east1/queues/'))
            self.assertEqual(queue.stackdriver_logging_config.sampling_ratio,
                             1.0)

        direct_ingest_queue_ids = {
            'direct-ingest-state-process-job-queue-v2',
            'direct-ingest-jpp-process-job-queue-v2',
            'direct-ingest-bq-import-export-v2', 'direct-ingest-scheduler-v2'
        }
        self.assertFalse(
            direct_ingest_queue_ids.difference(queues_updated_by_id.keys()))

        for queue_id in direct_ingest_queue_ids:
            queue = queues_updated_by_id[queue_id]
            self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1)

        for region in regions.get_supported_regions():
            self.assertTrue(region.get_queue_name() in queues_updated_by_id)

        self.assertTrue('bigquery-v2' in queues_updated_by_id)
        self.assertTrue('job-monitor-v2' in queues_updated_by_id)
        self.assertTrue('scraper-phase-v2' in queues_updated_by_id)
    def test_initialize_queues(self):
        # Act
        google_cloud_task_queue_config.initialize_queues(
            google_auth_token='fake-auth-token', project_id='my-project-id')

        # Assert
        queues_updated_by_id: Dict[str, queue_pb2.Queue] = {}
        for method_name, args, _kwargs in self.mock_client.mock_calls:
            if method_name == 'update_queue':
                queue = args[0]
                if not isinstance(queue, queue_pb2.Queue):
                    self.fail(f"Unexpected type [{type(queue)}]")
                _, queue_id = os.path.split(queue.name)
                queues_updated_by_id[queue_id] = queue

        for queue in queues_updated_by_id.values():
            self.assertTrue(
                queue.name.startswith(
                    'projects/my-project-id/locations/us-east1/queues/'))
            self.assertEqual(queue.stackdriver_logging_config.sampling_ratio,
                             1.0)

        direct_ingest_queue_ids = {
            'direct-ingest-state-process-job-queue-v2',
            'direct-ingest-jpp-process-job-queue-v2',
            'direct-ingest-scheduler-v2'
        }
        self.assertFalse(
            direct_ingest_queue_ids.difference(queues_updated_by_id.keys()))

        for queue_id in direct_ingest_queue_ids:
            queue = queues_updated_by_id[queue_id]
            self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1)

        for region in regions.get_supported_regions():
            self.assertTrue(region.get_queue_name() in queues_updated_by_id)

        self.assertTrue('bigquery-v2' in queues_updated_by_id)
        self.assertTrue('job-monitor-v2' in queues_updated_by_id)
        self.assertTrue('scraper-phase-v2' in queues_updated_by_id)
def _build_cloud_task_queue_configs(
        client_wrapper: GoogleCloudTasksClientWrapper
) -> List[queue_pb2.Queue]:
    """Builds a list of configurations for all Google Cloud Tasks queues that
    should be deployed in this environment.
    """

    queues = []

    # Direct ingest queues for handling /process_job requests
    for queue_name in [
            DIRECT_INGEST_STATE_PROCESS_JOB_QUEUE_V2,
            DIRECT_INGEST_JAILS_PROCESS_JOB_QUEUE_V2,
            DIRECT_INGEST_BQ_IMPORT_EXPORT_QUEUE_V2,
            DIRECT_INGEST_SCHEDULER_QUEUE_V2
    ]:
        queues.append(
            _queue_config_with_name(client_wrapper,
                                    DIRECT_INGEST_QUEUE_BASE_CONFIG,
                                    queue_name))

    queues.append(
        ProtobufBuilder(queue_pb2.Queue).compose(
            _queue_config_with_name(
                client_wrapper, SCRAPER_PHASE_QUEUE_CONFIG,
                SCRAPER_PHASE_QUEUE_V2)).update_args(
                    rate_limits=queue_pb2.RateLimits(
                        max_dispatches_per_second=1, ), ).build())

    queues.append(
        _queue_config_with_name(client_wrapper, BIGQUERY_QUEUE_CONFIG,
                                BIGQUERY_QUEUE_V2))
    queues.append(
        _queue_config_with_name(client_wrapper, JOB_MONITOR_QUEUE_CONFIG,
                                JOB_MONITOR_QUEUE_V2))

    for vendor in vendors.get_vendors():
        queue_params = vendors.get_vendor_queue_params(vendor)
        if queue_params is None:
            continue
        vendor_queue_name = \
            'vendor-{}-scraper-v2'.format(vendor.replace('_', '-'))
        queue = ProtobufBuilder(queue_pb2.Queue).compose(
            _queue_config_with_name(
                client_wrapper,
                BASE_SCRAPER_QUEUE_CONFIG, vendor_queue_name)).compose(
                    queue_pb2.Queue(**queue_params)).build()
        queues.append(queue)

    for region in regions.get_supported_regions():
        if region.shared_queue or not region.is_ingest_launched_in_env():
            continue

        queue = _queue_config_with_name(client_wrapper,
                                        BASE_SCRAPER_QUEUE_CONFIG,
                                        region.get_queue_name())
        if region.queue:
            queue = ProtobufBuilder(queue_pb2.Queue).compose(queue).compose(
                queue_pb2.Queue(**region.queue)).build()
        queues.append(queue)

    return queues