Example #1
0
def main():

    arg_parser = get_arg_parser_with_db('CTF Gameserver Controller')
    arg_parser.add_argument(
        '--nonstop',
        action='store_true',
        help='Use current time as start time and '
        'ignore CTF end time from the database. Useful for testing checkers.')
    arg_parser.add_argument(
        '--metrics-listen',
        help='Expose Prometheus metrics via HTTP ("<host>:<port>")')

    args = arg_parser.parse_args()

    logging.basicConfig(format='[%(levelname)s] %(message)s')
    numeric_loglevel = getattr(logging, args.loglevel.upper())
    logging.getLogger().setLevel(numeric_loglevel)

    try:
        db_conn = psycopg2.connect(host=args.dbhost,
                                   database=args.dbname,
                                   user=args.dbuser,
                                   password=args.dbpassword)
    except psycopg2.OperationalError as e:
        logging.error('Could not establish database connection: %s', e)
        return os.EX_UNAVAILABLE
    logging.info('Established database connection')

    # Keep our mental model easy by always using (timezone-aware) UTC for dates and times
    with transaction_cursor(db_conn) as cursor:
        cursor.execute('SET TIME ZONE "UTC"')

    # Check database grants
    try:
        try:
            database.get_control_info(db_conn, prohibit_changes=True)
        except DBDataError as e:
            logging.warning('Invalid database state: %s', e)

        database.increase_tick(db_conn, prohibit_changes=True)
    except psycopg2.ProgrammingError as e:
        if e.pgcode == postgres_errors.INSUFFICIENT_PRIVILEGE:
            # Log full exception because only the backtrace will tell which kind of permission is missing
            logging.exception('Missing database permissions:')
            return os.EX_NOPERM
        else:
            raise

    if args.metrics_listen is not None:
        try:
            metrics_host, metrics_port, metrics_family = parse_host_port(
                args.metrics_listen)
        except ValueError:
            logging.error(
                'Metrics listen address needs to be specified as "<host>:<port>"'
            )
            return os.EX_USAGE

        start_metrics_server(metrics_host, metrics_port, metrics_family)

    metrics = make_metrics(db_conn)
    metrics['start_timestamp'].set(time.time())

    daemon.notify('READY=1')

    while True:
        main_loop_step(db_conn, metrics, args.nonstop)
Example #2
0
 def test_ipv4(self):
     host, port, family = args.parse_host_port('127.0.0.1:22')
     self.assertEqual(host, '127.0.0.1')
     self.assertEqual(port, 22)
     self.assertEqual(family, socket.AF_INET)
Example #3
0
 def test_invalid(self):
     with self.assertRaises(ValueError):
         args.parse_host_port('::1')
Example #4
0
 def test_hostname(self):
     parsed = args.parse_host_port('localhost:1337')
     self.assertEqual(parsed[1], 1337)
Example #5
0
 def test_ipv6(self):
     host, port, family = args.parse_host_port('[::1]:8000')
     self.assertEqual(host, '::1')
     self.assertEqual(port, 8000)
     self.assertEqual(family, socket.AF_INET6)
Example #6
0
def main():

    arg_parser = get_arg_parser_with_db('CTF Gameserver Checker Master')
    arg_parser.add_argument('--ippattern', type=str, required=True,
                            help='(Old-style) Python formatstring for building the IP to connect to')
    arg_parser.add_argument('--flagsecret', type=str, required=True,
                            help='Base64 string used as secret in flag generation')
    arg_parser.add_argument('--metrics-listen', type=str, help='Expose Prometheus metrics via HTTP '
                            '("<host>:<port>")')

    group = arg_parser.add_argument_group('check', 'Check parameters')
    group.add_argument('--service', type=str, required=True,
                       help='Slug of the service')
    group.add_argument('--checkerscript', type=str, required=True,
                       help='Path of the Checker Script')
    group.add_argument('--sudouser', type=str, help=' User to excute the Checker Scripts as, will be passed '
                       'to `sudo -u`')
    group.add_argument('--stddeviations', type=float, default=2.0,
                       help='Consider past runtimes within this number of standard deviations when '
                       'estimating Checker Script runtime (default: 2)')
    group.add_argument('--checkercount', type=int, required=True,
                       help='Number of Checker Masters running for this service')
    group.add_argument('--interval', type=float, required=True,
                       help='Time between launching batches of Checker Scripts in seconds')

    group = arg_parser.add_argument_group('logging', 'Checker Script logging')
    group.add_argument('--journald', action='store_true', help='Log Checker Script messages to journald')
    group.add_argument('--gelf-server', help='Log Checker Script messages to the specified GELF (Graylog) '
                       'server ("<host>:<port>")')

    args = arg_parser.parse_args()

    logging.basicConfig(format='[%(levelname)s] %(message)s [%(name)s]')
    numeric_loglevel = getattr(logging, args.loglevel.upper())
    logging.getLogger().setLevel(numeric_loglevel)

    if args.interval < 3:
        logging.error('`--interval` must be at least 3 seconds')
        return os.EX_USAGE

    logging_params = {}

    # Configure logging
    if args.journald:
        try:
            # pylint: disable=import-outside-toplevel,unused-import,import-error
            from systemd.journal import JournalHandler
        except ImportError:
            logging.error('systemd module is required for journald logging')
            return os.EX_USAGE
        logging_params['journald'] = True

    if args.gelf_server is not None:
        try:
            # pylint: disable=import-outside-toplevel,unused-import,import-error
            import graypy
        except ImportError:
            logging.error('graypy module is required for GELF logging')
            return os.EX_USAGE
        try:
            gelf_host, gelf_port, gelf_family = parse_host_port(args.gelf_server)
        except ValueError:
            logging.error('GELF server needs to be specified as "<host>:<port>"')
            return os.EX_USAGE
        logging_params['gelf'] = {'host': gelf_host, 'port': gelf_port, 'family': gelf_family}

    # Configure metrics
    if args.metrics_listen is not None:
        try:
            metrics_host, metrics_port, metrics_family = parse_host_port(args.metrics_listen)
        except ValueError:
            logging.error('Metrics listen address needs to be specified as "<host>:<port>"')
            return os.EX_USAGE

        metrics_queue = multiprocessing.Queue()
        metrics_recv, metrics_send = multiprocessing.Pipe()
        metrics_collector_process = multiprocessing.Process(
            target=metrics.run_collector,
            args=(args.service, metrics.checker_metrics_factory, metrics_queue, metrics_send)
        )
        # Terminate the process when the parent process exits
        metrics_collector_process.daemon = True
        metrics_collector_process.start()
        logging.info('Started metrics collector process')
        metrics_server_process = multiprocessing.Process(
            target=metrics.run_http_server,
            args=(metrics_host, metrics_port, metrics_family, metrics_queue, metrics_recv)
        )
        metrics_server_process.daemon = True
        metrics_server_process.start()
        logging.info('Started metrics HTTP server process')

        metrics.set(metrics_queue, 'interval_length_seconds', args.interval)
        metrics.set(metrics_queue, 'start_timestamp', time.time())
    else:
        metrics_queue = metrics.DummyQueue()

    flag_secret = base64.b64decode(args.flagsecret)

    # Connect to databases
    try:
        db_conn = psycopg2.connect(host=args.dbhost, database=args.dbname, user=args.dbuser,
                                   password=args.dbpassword)
    except psycopg2.OperationalError as e:
        logging.error('Could not establish connection to database: %s', e)
        return os.EX_UNAVAILABLE
    logging.info('Established connection to database')

    # Keep our mental model easy by always using (timezone-aware) UTC for dates and times
    with transaction_cursor(db_conn) as cursor:
        cursor.execute('SET TIME ZONE "UTC"')

    # Check database grants
    try:
        try:
            database.get_control_info(db_conn, prohibit_changes=True)
        except DBDataError as e:
            logging.warning('Invalid database state: %s', e)
        try:
            service_id = database.get_service_attributes(db_conn, args.service,
                                                         prohibit_changes=True)['id']
        except DBDataError as e:
            logging.warning('Invalid database state: %s', e)
            service_id = 1337    # Use dummy value for subsequent grant checks
        try:
            database.get_current_tick(db_conn, prohibit_changes=True)
        except DBDataError as e:
            logging.warning('Invalid database state: %s', e)

        database.get_task_count(db_conn, service_id, prohibit_changes=True)
        database.get_new_tasks(db_conn, service_id, 1, prohibit_changes=True)
        database.commit_result(db_conn, service_id, 1, 2147483647, 0, prohibit_changes=True, fake_team_id=1)
        database.set_flagid(db_conn, service_id, 1, 0, 'id', prohibit_changes=True, fake_team_id=1)
        database.load_state(db_conn, service_id, 1, 'key', prohibit_changes=True)
        database.store_state(db_conn, service_id, 1, 'key', 'data', prohibit_changes=True)
    except psycopg2.ProgrammingError as e:
        if e.pgcode == postgres_errors.INSUFFICIENT_PRIVILEGE:
            # Log full exception because only the backtrace will tell which kind of permission is missing
            logging.exception('Missing database permissions:')
            return os.EX_NOPERM
        else:
            raise

    daemon.notify('READY=1')

    while True:
        try:
            master_loop = MasterLoop(db_conn, args.service, args.checkerscript, args.sudouser,
                                     args.stddeviations, args.checkercount, args.interval, args.ippattern,
                                     flag_secret, logging_params, metrics_queue)
            break
        except DBDataError as e:
            logging.warning('Waiting for valid database state: %s', e)
            time.sleep(60)

    # Graceful shutdown to prevent loss of check results
    def sigterm_handler(_, __):
        logging.info('Shutting down, waiting for %d Checker Scripts to finish',
                     master_loop.get_running_script_count())
        master_loop.shutting_down = True
    signal.signal(signal.SIGTERM, sigterm_handler)

    try:
        while True:
            master_loop.step()
            if master_loop.shutting_down and master_loop.get_running_script_count() == 0:
                break
    except:    # noqa, pylint: disable=bare-except
        logging.exception('Aborting due to unexpected error:')
        master_loop.supervisor.terminate_runners()
        return os.EX_SOFTWARE

    return os.EX_OK