def main(): arg_parser = get_arg_parser_with_db('CTF Gameserver Controller') arg_parser.add_argument( '--nonstop', action='store_true', help='Use current time as start time and ' 'ignore CTF end time from the database. Useful for testing checkers.') arg_parser.add_argument( '--metrics-listen', help='Expose Prometheus metrics via HTTP ("<host>:<port>")') args = arg_parser.parse_args() logging.basicConfig(format='[%(levelname)s] %(message)s') numeric_loglevel = getattr(logging, args.loglevel.upper()) logging.getLogger().setLevel(numeric_loglevel) try: db_conn = psycopg2.connect(host=args.dbhost, database=args.dbname, user=args.dbuser, password=args.dbpassword) except psycopg2.OperationalError as e: logging.error('Could not establish database connection: %s', e) return os.EX_UNAVAILABLE logging.info('Established database connection') # Keep our mental model easy by always using (timezone-aware) UTC for dates and times with transaction_cursor(db_conn) as cursor: cursor.execute('SET TIME ZONE "UTC"') # Check database grants try: try: database.get_control_info(db_conn, prohibit_changes=True) except DBDataError as e: logging.warning('Invalid database state: %s', e) database.increase_tick(db_conn, prohibit_changes=True) except psycopg2.ProgrammingError as e: if e.pgcode == postgres_errors.INSUFFICIENT_PRIVILEGE: # Log full exception because only the backtrace will tell which kind of permission is missing logging.exception('Missing database permissions:') return os.EX_NOPERM else: raise if args.metrics_listen is not None: try: metrics_host, metrics_port, metrics_family = parse_host_port( args.metrics_listen) except ValueError: logging.error( 'Metrics listen address needs to be specified as "<host>:<port>"' ) return os.EX_USAGE start_metrics_server(metrics_host, metrics_port, metrics_family) metrics = make_metrics(db_conn) metrics['start_timestamp'].set(time.time()) daemon.notify('READY=1') while True: main_loop_step(db_conn, metrics, args.nonstop)
def test_ipv4(self): host, port, family = args.parse_host_port('127.0.0.1:22') self.assertEqual(host, '127.0.0.1') self.assertEqual(port, 22) self.assertEqual(family, socket.AF_INET)
def test_invalid(self): with self.assertRaises(ValueError): args.parse_host_port('::1')
def test_hostname(self): parsed = args.parse_host_port('localhost:1337') self.assertEqual(parsed[1], 1337)
def test_ipv6(self): host, port, family = args.parse_host_port('[::1]:8000') self.assertEqual(host, '::1') self.assertEqual(port, 8000) self.assertEqual(family, socket.AF_INET6)
def main(): arg_parser = get_arg_parser_with_db('CTF Gameserver Checker Master') arg_parser.add_argument('--ippattern', type=str, required=True, help='(Old-style) Python formatstring for building the IP to connect to') arg_parser.add_argument('--flagsecret', type=str, required=True, help='Base64 string used as secret in flag generation') arg_parser.add_argument('--metrics-listen', type=str, help='Expose Prometheus metrics via HTTP ' '("<host>:<port>")') group = arg_parser.add_argument_group('check', 'Check parameters') group.add_argument('--service', type=str, required=True, help='Slug of the service') group.add_argument('--checkerscript', type=str, required=True, help='Path of the Checker Script') group.add_argument('--sudouser', type=str, help=' User to excute the Checker Scripts as, will be passed ' 'to `sudo -u`') group.add_argument('--stddeviations', type=float, default=2.0, help='Consider past runtimes within this number of standard deviations when ' 'estimating Checker Script runtime (default: 2)') group.add_argument('--checkercount', type=int, required=True, help='Number of Checker Masters running for this service') group.add_argument('--interval', type=float, required=True, help='Time between launching batches of Checker Scripts in seconds') group = arg_parser.add_argument_group('logging', 'Checker Script logging') group.add_argument('--journald', action='store_true', help='Log Checker Script messages to journald') group.add_argument('--gelf-server', help='Log Checker Script messages to the specified GELF (Graylog) ' 'server ("<host>:<port>")') args = arg_parser.parse_args() logging.basicConfig(format='[%(levelname)s] %(message)s [%(name)s]') numeric_loglevel = getattr(logging, args.loglevel.upper()) logging.getLogger().setLevel(numeric_loglevel) if args.interval < 3: logging.error('`--interval` must be at least 3 seconds') return os.EX_USAGE logging_params = {} # Configure logging if args.journald: try: # pylint: disable=import-outside-toplevel,unused-import,import-error from systemd.journal import JournalHandler except ImportError: logging.error('systemd module is required for journald logging') return os.EX_USAGE logging_params['journald'] = True if args.gelf_server is not None: try: # pylint: disable=import-outside-toplevel,unused-import,import-error import graypy except ImportError: logging.error('graypy module is required for GELF logging') return os.EX_USAGE try: gelf_host, gelf_port, gelf_family = parse_host_port(args.gelf_server) except ValueError: logging.error('GELF server needs to be specified as "<host>:<port>"') return os.EX_USAGE logging_params['gelf'] = {'host': gelf_host, 'port': gelf_port, 'family': gelf_family} # Configure metrics if args.metrics_listen is not None: try: metrics_host, metrics_port, metrics_family = parse_host_port(args.metrics_listen) except ValueError: logging.error('Metrics listen address needs to be specified as "<host>:<port>"') return os.EX_USAGE metrics_queue = multiprocessing.Queue() metrics_recv, metrics_send = multiprocessing.Pipe() metrics_collector_process = multiprocessing.Process( target=metrics.run_collector, args=(args.service, metrics.checker_metrics_factory, metrics_queue, metrics_send) ) # Terminate the process when the parent process exits metrics_collector_process.daemon = True metrics_collector_process.start() logging.info('Started metrics collector process') metrics_server_process = multiprocessing.Process( target=metrics.run_http_server, args=(metrics_host, metrics_port, metrics_family, metrics_queue, metrics_recv) ) metrics_server_process.daemon = True metrics_server_process.start() logging.info('Started metrics HTTP server process') metrics.set(metrics_queue, 'interval_length_seconds', args.interval) metrics.set(metrics_queue, 'start_timestamp', time.time()) else: metrics_queue = metrics.DummyQueue() flag_secret = base64.b64decode(args.flagsecret) # Connect to databases try: db_conn = psycopg2.connect(host=args.dbhost, database=args.dbname, user=args.dbuser, password=args.dbpassword) except psycopg2.OperationalError as e: logging.error('Could not establish connection to database: %s', e) return os.EX_UNAVAILABLE logging.info('Established connection to database') # Keep our mental model easy by always using (timezone-aware) UTC for dates and times with transaction_cursor(db_conn) as cursor: cursor.execute('SET TIME ZONE "UTC"') # Check database grants try: try: database.get_control_info(db_conn, prohibit_changes=True) except DBDataError as e: logging.warning('Invalid database state: %s', e) try: service_id = database.get_service_attributes(db_conn, args.service, prohibit_changes=True)['id'] except DBDataError as e: logging.warning('Invalid database state: %s', e) service_id = 1337 # Use dummy value for subsequent grant checks try: database.get_current_tick(db_conn, prohibit_changes=True) except DBDataError as e: logging.warning('Invalid database state: %s', e) database.get_task_count(db_conn, service_id, prohibit_changes=True) database.get_new_tasks(db_conn, service_id, 1, prohibit_changes=True) database.commit_result(db_conn, service_id, 1, 2147483647, 0, prohibit_changes=True, fake_team_id=1) database.set_flagid(db_conn, service_id, 1, 0, 'id', prohibit_changes=True, fake_team_id=1) database.load_state(db_conn, service_id, 1, 'key', prohibit_changes=True) database.store_state(db_conn, service_id, 1, 'key', 'data', prohibit_changes=True) except psycopg2.ProgrammingError as e: if e.pgcode == postgres_errors.INSUFFICIENT_PRIVILEGE: # Log full exception because only the backtrace will tell which kind of permission is missing logging.exception('Missing database permissions:') return os.EX_NOPERM else: raise daemon.notify('READY=1') while True: try: master_loop = MasterLoop(db_conn, args.service, args.checkerscript, args.sudouser, args.stddeviations, args.checkercount, args.interval, args.ippattern, flag_secret, logging_params, metrics_queue) break except DBDataError as e: logging.warning('Waiting for valid database state: %s', e) time.sleep(60) # Graceful shutdown to prevent loss of check results def sigterm_handler(_, __): logging.info('Shutting down, waiting for %d Checker Scripts to finish', master_loop.get_running_script_count()) master_loop.shutting_down = True signal.signal(signal.SIGTERM, sigterm_handler) try: while True: master_loop.step() if master_loop.shutting_down and master_loop.get_running_script_count() == 0: break except: # noqa, pylint: disable=bare-except logging.exception('Aborting due to unexpected error:') master_loop.supervisor.terminate_runners() return os.EX_SOFTWARE return os.EX_OK