def build_queues(environments: Set[str]): qs: List[Dict[str, Any]] = [] qs.append({ 'name': queues.SCRAPER_PHASE_QUEUE, **BASE_QUEUE_CONFIG, 'rate': '1/s' }) for vendor in vendors.get_vendors(): queue_params = vendors.get_vendor_queue_params(vendor) if queue_params is None: continue qs.append({ 'name': 'vendor-{}-scraper'.format(vendor.replace('_', '-')), **BASE_QUEUE_CONFIG, **queue_params }) for region in regions.get_supported_regions(): if region.shared_queue or region.environment not in environments: continue qs.append({ 'name': region.get_queue_name(), **BASE_QUEUE_CONFIG, **(region.queue or {}) }) qs.append(DIRECT_INGEST_JAILS_TASK_QUEUE_CONFIG) qs.append(DIRECT_INGEST_STATE_TASK_QUEUE_CONFIG) qs.append(DIRECT_INGEST_SCHEDULER_QUEUE_CONFIG) qs.append(BIGQUERY_QUEUE_CONFIG) with open('queue.yaml', 'w') as queue_manifest: yaml.dump({'queue': qs}, queue_manifest, default_flow_style=False, Dumper=NoAliasDumper)
def test_initialize_queues(self): # Act with metadata.local_project_id_override('my-project-id'): google_cloud_task_queue_config.initialize_queues( google_auth_token='fake-auth-token') # Assert queues_updated_by_id = self.get_updated_queues() for queue in queues_updated_by_id.values(): self.assertTrue( queue.name.startswith( 'projects/my-project-id/locations/us-east1/queues/')) self.assertEqual(queue.stackdriver_logging_config.sampling_ratio, 1.0) direct_ingest_queue_ids = { 'direct-ingest-state-process-job-queue-v2', 'direct-ingest-jpp-process-job-queue-v2', 'direct-ingest-bq-import-export-v2', 'direct-ingest-scheduler-v2' } self.assertFalse( direct_ingest_queue_ids.difference(queues_updated_by_id.keys())) for queue_id in direct_ingest_queue_ids: queue = queues_updated_by_id[queue_id] self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1) for region in regions.get_supported_regions(): self.assertTrue(region.get_queue_name() in queues_updated_by_id) self.assertTrue('bigquery-v2' in queues_updated_by_id) self.assertTrue('job-monitor-v2' in queues_updated_by_id) self.assertTrue('scraper-phase-v2' in queues_updated_by_id)
def test_initialize_queues(self): # Act google_cloud_task_queue_config.initialize_queues( google_auth_token='fake-auth-token', project_id='my-project-id') # Assert queues_updated_by_id: Dict[str, queue_pb2.Queue] = {} for method_name, args, _kwargs in self.mock_client.mock_calls: if method_name == 'update_queue': queue = args[0] if not isinstance(queue, queue_pb2.Queue): self.fail(f"Unexpected type [{type(queue)}]") _, queue_id = os.path.split(queue.name) queues_updated_by_id[queue_id] = queue for queue in queues_updated_by_id.values(): self.assertTrue( queue.name.startswith( 'projects/my-project-id/locations/us-east1/queues/')) self.assertEqual(queue.stackdriver_logging_config.sampling_ratio, 1.0) direct_ingest_queue_ids = { 'direct-ingest-state-process-job-queue-v2', 'direct-ingest-jpp-process-job-queue-v2', 'direct-ingest-scheduler-v2' } self.assertFalse( direct_ingest_queue_ids.difference(queues_updated_by_id.keys())) for queue_id in direct_ingest_queue_ids: queue = queues_updated_by_id[queue_id] self.assertEqual(queue.rate_limits.max_concurrent_dispatches, 1) for region in regions.get_supported_regions(): self.assertTrue(region.get_queue_name() in queues_updated_by_id) self.assertTrue('bigquery-v2' in queues_updated_by_id) self.assertTrue('job-monitor-v2' in queues_updated_by_id) self.assertTrue('scraper-phase-v2' in queues_updated_by_id)
def _build_cloud_task_queue_configs( client_wrapper: GoogleCloudTasksClientWrapper ) -> List[queue_pb2.Queue]: """Builds a list of configurations for all Google Cloud Tasks queues that should be deployed in this environment. """ queues = [] # Direct ingest queues for handling /process_job requests for queue_name in [ DIRECT_INGEST_STATE_PROCESS_JOB_QUEUE_V2, DIRECT_INGEST_JAILS_PROCESS_JOB_QUEUE_V2, DIRECT_INGEST_BQ_IMPORT_EXPORT_QUEUE_V2, DIRECT_INGEST_SCHEDULER_QUEUE_V2 ]: queues.append( _queue_config_with_name(client_wrapper, DIRECT_INGEST_QUEUE_BASE_CONFIG, queue_name)) queues.append( ProtobufBuilder(queue_pb2.Queue).compose( _queue_config_with_name( client_wrapper, SCRAPER_PHASE_QUEUE_CONFIG, SCRAPER_PHASE_QUEUE_V2)).update_args( rate_limits=queue_pb2.RateLimits( max_dispatches_per_second=1, ), ).build()) queues.append( _queue_config_with_name(client_wrapper, BIGQUERY_QUEUE_CONFIG, BIGQUERY_QUEUE_V2)) queues.append( _queue_config_with_name(client_wrapper, JOB_MONITOR_QUEUE_CONFIG, JOB_MONITOR_QUEUE_V2)) for vendor in vendors.get_vendors(): queue_params = vendors.get_vendor_queue_params(vendor) if queue_params is None: continue vendor_queue_name = \ 'vendor-{}-scraper-v2'.format(vendor.replace('_', '-')) queue = ProtobufBuilder(queue_pb2.Queue).compose( _queue_config_with_name( client_wrapper, BASE_SCRAPER_QUEUE_CONFIG, vendor_queue_name)).compose( queue_pb2.Queue(**queue_params)).build() queues.append(queue) for region in regions.get_supported_regions(): if region.shared_queue or not region.is_ingest_launched_in_env(): continue queue = _queue_config_with_name(client_wrapper, BASE_SCRAPER_QUEUE_CONFIG, region.get_queue_name()) if region.queue: queue = ProtobufBuilder(queue_pb2.Queue).compose(queue).compose( queue_pb2.Queue(**region.queue)).build() queues.append(queue) return queues