def main(argv: List[str]): args = parser.parse_args(argv) if args.verbose: config.debug = 1 configure_script_logging(logger) azul_client = AzulClient(prefix=args.prefix, num_workers=args.num_workers) azul_client.reset_indexer(args.catalogs, purge_queues=args.purge, delete_indices=args.delete, create_indices=args.create or args.index and args.delete) if args.index: logger.info('Queuing notifications for reindexing ...') for catalog in args.catalogs: if args.partition_prefix_length: azul_client.remote_reindex(catalog, args.partition_prefix_length) else: azul_client.reindex(catalog) if args.wait: # Match max_timeout to reindex job timeout in `.gitlab-ci.yml` azul_client.wait_for_indexer( min_timeout=20 * 60 if config.dss_query_prefix else None, max_timeout=13 * 60 * 60)
def main(argv): configure_script_logging(logger) import argparse parser = argparse.ArgumentParser( description='Subscribe indexer lambda to bundle events from DSS') parser.add_argument('--unsubscribe', '-U', dest='subscribe', action='store_false', default=True) parser.add_argument( '--personal', '-p', dest='shared', action='store_false', default=True, help= "Do not use the shared credentials of the Google service account that represents the " "current deployment, but instead use personal credentials for authenticating to the DSS. " "When specifying this option you will need to a) run `hca dss login` prior to running " "this script or b) set GOOGLE_APPLICATION_CREDENTIALS to point to another service " "account's credentials. Note that this implies that the resulting DSS subscription will " "be owned by a) you or b) the other service account and that only a) you or b) someone " "in possession of those credentials can modify the subscription in the future. This is " "typically not what you'd want.") options = parser.parse_args(argv) dss_client = azul.dss.client() for catalog in config.catalogs: plugin = RepositoryPlugin.load(catalog) if isinstance(plugin, dss.Plugin): if options.shared: with aws.service_account_credentials( config.ServiceAccount.indexer): subscription.manage_subscriptions( plugin, dss_client, subscribe=options.subscribe) else: subscription.manage_subscriptions(plugin, dss_client, subscribe=options.subscribe)
# This script simulates a user triggering Azul endpoints via the Data Browser # GUI. # # Usage: # # - Set $azul_locust_catalog to the desired catalog, or leave unset to test the # default catalog. # # - Run `locust -f scripts/locust/service.py` # # - In browser go to localhost:8089 # # For more info see https://docs.locust.io/en/stable/ log = logging.getLogger(__name__) configure_script_logging(log) class LocustConfig(Config): @cached_property def catalog(self) -> str: # Locust does not support passing command-line arguments to the script catalog = os.environ.get('azul_locust_catalog', self.default_catalog) require(catalog in self.catalogs) return catalog config = LocustConfig() @contextmanager
def main(): configure_script_logging(log) register_with_sam() verify_sources() verify_source_access()
def main(argv: List[str]): args = parser.parse_args(argv) if args.verbose: config.debug = 1 configure_script_logging(logger) azul = AzulClient(num_workers=args.num_workers) source_globs = set(args.sources) if not args.local or args.deindex: sources_by_catalog = defaultdict(set) globs_matched = set() for catalog in args.catalogs: sources = azul.catalog_sources(catalog) for source_glob in source_globs: matches = fnmatch.filter(sources, source_glob) if matches: globs_matched.add(source_glob) logger.debug('Source glob %r matched sources %r in catalog %r', source_glob, matches, catalog) sources_by_catalog[catalog].update(matches) unmatched = source_globs - globs_matched if unmatched: logger.warning('Source(s) not found in any catalog: %r', unmatched) require(any(sources_by_catalog.values()), 'No valid sources specified for any catalog') else: if source_globs == {'*'}: sources_by_catalog = { catalog: azul.catalog_sources(catalog) for catalog in args.catalogs } else: parser.error('Cannot specify sources when performing a local reindex') assert False if args.deindex: require(not any((args.index, args.delete, args.create)), '--deindex is incompatible with --index, --create, and --delete.') require('*' not in source_globs, '--deindex is incompatible with source `*`. ' 'Use --delete instead.') for catalog, sources in sources_by_catalog.items(): if sources: azul.deindex(catalog, sources) azul.reset_indexer(args.catalogs, purge_queues=args.purge, delete_indices=args.delete, create_indices=args.create or args.index and args.delete) if args.index: logger.info('Queuing notifications for reindexing ...') reservation = None num_notifications = 0 for catalog, sources in sources_by_catalog.items(): if sources: if ( args.manage_slots and reservation is None and isinstance(azul.repository_plugin(catalog), tdr.Plugin) ): reservation = BigQueryReservation() reservation.activate() if not args.local: azul.remote_reindex(catalog, sources) num_notifications = None else: num_notifications += azul.reindex(catalog, args.prefix) else: logger.info('Skipping catalog %r (no matching sources)', catalog) if args.wait: if num_notifications == 0: logger.warning('No notifications for prefix %r and catalogs %r were sent', args.prefix, args.catalogs) else: azul.wait_for_indexer()
def main(argv): configure_script_logging(logger) p = argparse.ArgumentParser( description='Manage the SQS queues in the current deployment') sps = p.add_subparsers(help='sub-command help', dest='command') sps.add_parser('list', help='List SQS queues in current deployment') sp = sps.add_parser('dump', help='Dump contents of queue into designated file') sp.add_argument('queue', metavar='QUEUE_NAME', help='Name of the queue to obtain messages from') sp.add_argument('path', metavar='FILE_PATH', help='Path of file to write messages to') sp.add_argument( '--delete', '-D', action='store_true', help= 'Remove messages from the queue after writing them to the specified file. By default the ' 'messages will be returned to the queue') sp.add_argument('--no-json-body', '-J', dest='json_body', action='store_false', help='Do not deserialize JSON in queue message body.') sp = sps.add_parser('feed', help='Feed messages from file back into queue') sp.add_argument('path', metavar='FILE_PATH', help='Path of file to read messages from') sp.add_argument('queue', metavar='QUEUE_NAME', help='Name of the queue to feed messages into') sp.add_argument( '--force', '-F', action='store_true', help='Force feeding messages to a queue they did not originate from.') sp.add_argument( '--delete', '-D', action='store_true', help= 'Remove messages from the file after submitting them to the specified queue. By default ' 'the messages will remain in the file') sp = sps.add_parser('purge', help='Purge all messages in a queue') sp.add_argument('queue', metavar='QUEUE_NAME', help='Name of the queue to purge.') sps.add_parser( 'purge_all', help= 'Purge all messages in all queues in the current deployment. Use with caution. The ' 'messages will be lost forever.') sp = sps.add_parser( 'dump_all', help= 'Dump all messages in all queues in the current deployment. Each queue will be ' 'dumped into a separate JSON file. The name of the JSON file is the name of ' 'the queue followed by ".json"') sp.add_argument( '--delete', '-D', action='store_true', help= 'Remove messages from each queue after writing them to the its file. By default the ' 'messages will be returned to the queue') sp.add_argument('--no-json-body', '-J', dest='json_body', action='store_false', help='Do not deserialize JSON in queue message body.') args = p.parse_args(argv) if args.command in ('list', 'purge', 'purge_all'): queues = Queues() if args.command == 'list': queues.list() elif args.command == 'purge': queues.purge(args.queue) elif args.command == 'purge_all': queues.purge_all() else: assert False, args.command elif args.command in ('dump', 'dump_all'): queues = Queues(delete=args.delete, json_body=args.json_body) if args.command == 'dump': queues.dump(args.queue, args.path) elif args.command == 'dump_all': queues.dump_all() else: assert False, args.command elif args.command == 'feed': queues = Queues(delete=args.delete) queues.feed(args.path, args.queue, force=args.force) else: p.print_usage()