Beispiel #1
0
def main():
    """
    Main magmad function
    """
    service = MagmaService('magmad', mconfigs_pb2.MagmaD())

    # Optionally pipe errors to Sentry
    sentry_init(service_name=service.name)

    logging.info('Starting magmad for UUID: %s', snowflake.make_snowflake())

    # Create service manager
    services = service.config.get('magma_services')
    init_system = service.config.get('init_system', 'systemd')
    registered_dynamic_services = service.config.get(
        'registered_dynamic_services',
        [],
    )
    enabled_dynamic_services = []
    if service.mconfig is not None:
        enabled_dynamic_services = service.mconfig.dynamic_services

    # Poll the services' Service303 interface
    service_poller = ServicePoller(
        service.loop,
        service.config,
        enabled_dynamic_services,
    )
    service_poller.start()

    service_manager = ServiceManager(
        services,
        init_system,
        service_poller,
        registered_dynamic_services,
        enabled_dynamic_services,
    )

    # Get metrics service config
    metrics_config = service.config.get('metricsd')
    metrics_services = metrics_config['services']
    collect_interval = metrics_config['collect_interval']
    sync_interval = metrics_config['sync_interval']
    grpc_timeout = metrics_config['grpc_timeout']
    grpc_msg_size = metrics_config.get('max_grpc_msg_size_mb', 4)
    metrics_post_processor_fn = metrics_config.get('post_processing_fn')

    metric_scrape_targets = [
        ScrapeTarget(t['url'], t['name'], t['interval'])
        for t in metrics_config.get('metric_scrape_targets', [])
    ]

    # Create local metrics collector
    metrics_collector = MetricsCollector(
        services=metrics_services,
        collect_interval=collect_interval,
        sync_interval=sync_interval,
        grpc_timeout=grpc_timeout,
        grpc_max_msg_size_mb=grpc_msg_size,
        loop=service.loop,
        post_processing_fn=get_metrics_postprocessor_fn(
            metrics_post_processor_fn, ),
        scrape_targets=metric_scrape_targets,
    )

    # Poll and sync the metrics collector loops
    metrics_collector.run()

    # Start a background thread to stream updates from the cloud
    stream_client = None
    if service.config.get('enable_config_streamer', False):
        stream_client = StreamerClient(
            {
                CONFIG_STREAM_NAME:
                ConfigManager(
                    services,
                    service_manager,
                    service,
                    MconfigManagerImpl(),
                ),
            },
            service.loop,
        )

    # Create sync rpc client with a heartbeat of 30 seconds (timeout = 60s)
    sync_rpc_client = None
    if service.config.get('enable_sync_rpc', False):
        sync_rpc_client = SyncRPCClient(
            service.loop,
            30,
            service.config.get('print_grpc_payload', False),
        )

    first_time_bootstrap = True

    # This is called when bootstrap succeeds and when _bootstrap_check is
    # invoked but bootstrap is not needed. If it's invoked right after certs
    # are generated, certs_generated is true, control_proxy will restart.
    async def bootstrap_success_cb(certs_generated: bool):
        nonlocal first_time_bootstrap
        if first_time_bootstrap:
            if stream_client:
                stream_client.start()
            if sync_rpc_client:
                sync_rpc_client.start()
            first_time_bootstrap = False
        if certs_generated:
            svcs_to_restart = []
            if 'control_proxy' in services:
                svcs_to_restart.append('control_proxy')

            # fluent-bit caches TLS client certs in memory, so we need to
            # restart it whenever the certs change
            fresh_mconfig = get_mconfig_manager().load_service_mconfig(
                'magmad',
                mconfigs_pb2.MagmaD(),
            )
            dynamic_svcs = fresh_mconfig.dynamic_services or []
            if 'td-agent-bit' in dynamic_svcs:
                svcs_to_restart.append('td-agent-bit')

            await service_manager.restart_services(services=svcs_to_restart)

    # Create bootstrap manager
    bootstrap_manager = BootstrapManager(service, bootstrap_success_cb)

    # Initialize kernel version poller if it is enabled
    kernel_version_poller = None
    if service.config.get('enable_kernel_version_checking', False):
        kernel_version_poller = KernelVersionsPoller(service)
        kernel_version_poller.start()

    # gateway status generator to bundle various information about this
    # gateway into an object.
    gateway_status_factory = GatewayStatusFactory(
        service=service,
        service_poller=service_poller,
        kernel_version_poller=kernel_version_poller,
    )

    # _grpc_client_manager to manage grpc client recycling
    grpc_client_manager = GRPCClientManager(
        service_name="state",
        service_stub=StateServiceStub,
        max_client_reuse=60,
    )

    # Initialize StateReporter
    state_reporter = StateReporter(
        config=service.config,
        mconfig=service.mconfig,
        loop=service.loop,
        bootstrap_manager=bootstrap_manager,
        gw_status_factory=gateway_status_factory,
        grpc_client_manager=grpc_client_manager,
    )

    # Initialize ServiceHealthWatchdog
    service_health_watchdog = ServiceHealthWatchdog(
        config=service.config,
        loop=service.loop,
        service_poller=service_poller,
        service_manager=service_manager,
    )

    # Start _bootstrap_manager
    bootstrap_manager.start_bootstrap_manager()

    # Start all services when magmad comes up
    service.loop.create_task(service_manager.start_services())

    # Start state reporting loop
    state_reporter.start()

    # Start service timeout health check loop
    service_health_watchdog.start()

    # Start upgrade manager loop
    if service.config.get('enable_upgrade_manager', False):
        upgrader = _get_upgrader_impl(service)
        service.loop.create_task(start_upgrade_loop(service, upgrader))

    # Start network health metric collection
    if service.config.get('enable_network_monitor', False):
        service.loop.create_task(metrics_collection_loop(service.config))

    # Create generic command executor
    command_executor = None
    if service.config.get('generic_command_config', None):
        command_executor = get_command_executor_impl(service)

    # Start loop to monitor unattended upgrade status
    service.loop.create_task(monitor_unattended_upgrade_status())

    # Add all servicers to the server
    magmad_servicer = MagmadRpcServicer(
        service,
        services,
        service_manager,
        get_mconfig_manager(),
        command_executor,
        service.loop,
        service.config.get('print_grpc_payload', False),
    )
    magmad_servicer.add_to_server(service.rpc_server)

    if SDWatchdog.has_notify():
        # Create systemd watchdog
        sdwatchdog = SDWatchdog(
            tasks=[bootstrap_manager, state_reporter],
            update_status=True,
        )
        # Start watchdog loop
        service.loop.create_task(sdwatchdog.run())

    # Run the service loop
    service.run()

    # Cleanup the service
    service.close()
Beispiel #2
0
    def test_update(self, config_mock):
        """
        Test that mconfig updates are handled correctly
        """
        # Set up fixture data
        # Update will simulate gateway moving from
        # test_mconfig -> updated_mconfig
        TestUpdate = namedtuple('TestUpdate', ['value', 'key'])
        test_mconfig = GatewayConfigs()
        updated_mconfig = GatewayConfigs()

        some_any = Any()
        magmad = MagmaD(log_level=1)
        some_any.Pack(magmad)
        test_mconfig.configs_by_key['magmad'].CopyFrom(some_any)
        updated_mconfig.configs_by_key['magmad'].CopyFrom(some_any)

        metricsd = MetricsD(log_level=2)
        some_any.Pack(metricsd)
        test_mconfig.configs_by_key['metricsd'].CopyFrom(some_any)
        metricsd = MetricsD(log_level=3)
        some_any.Pack(metricsd)
        updated_mconfig.configs_by_key['metricsd'].CopyFrom(some_any)

        # Set up mock dependencies
        config_mock.return_value = {
            'magma_services': ['magmad', 'metricsd'],
        }

        @asyncio.coroutine
        def _mock_restart_services(): return "blah"

        service_manager_mock = MagicMock()
        magmad_service_mock = MagicMock()
        mconfig_manager_mock = MconfigManagerImpl()

        load_mock = patch.object(
            mconfig_manager_mock,
            'load_mconfig', MagicMock(return_value=test_mconfig),
        )
        update_mock = patch.object(
            mconfig_manager_mock,
            'update_stored_mconfig', Mock(),
        )
        restart_service_mock = patch.object(
            service_manager_mock,
            'restart_services', MagicMock(wraps=_mock_restart_services),
        )

        with load_mock as loader, update_mock as updater, \
                restart_service_mock as restarter:
            loop = asyncio.new_event_loop()
            config_manager = ConfigManager(
                ['magmad', 'metricsd'], service_manager_mock,
                magmad_service_mock, mconfig_manager_mock,
                allow_unknown_fields=False,
                loop=loop,
            )

            # Verify that config update restarts all services
            update_str = MessageToJson(updated_mconfig)
            updates = [
                TestUpdate(value='', key='some key'),
                TestUpdate(
                    value=update_str.encode('utf-8'),
                    key='last key',
                ),
            ]
            config_manager.process_update(CONFIG_STREAM_NAME, updates, False)

            # Only metricsd config was updated, hence should be restarted
            loader.assert_called_once_with()
            restarter.assert_called_once_with(['metricsd'])
            updater.assert_called_once_with(update_str)
Beispiel #3
0
def main():
    """
    Main magmad function
    """
    service = MagmaService('magmad')

    logging.info('Starting magmad for UUID: %s', snowflake.make_snowflake())

    # Create service manager
    services = service.config['magma_services']
    init_system = service.config.get('init_system', 'systemd')
    registered_dynamic_services = service.config.get(
        'registered_dynamic_services', [])
    enabled_dynamic_services = []
    if service.mconfig is not None:
        enabled_dynamic_services = service.mconfig.dynamic_services

    # Poll the services' Service303 interface
    service_poller = ServicePoller(service.loop, service.config)
    service_poller.start()

    service_manager = ServiceManager(services, init_system, service_poller,
                                     registered_dynamic_services,
                                     enabled_dynamic_services)

    # Start a background thread to stream updates from the cloud
    stream_client = None
    if service.config.get('enable_config_streamer', False):
        stream_client = StreamerClient(
            {
                CONFIG_STREAM_NAME:
                ConfigManager(
                    services,
                    service_manager,
                    service,
                    MconfigManagerImpl(),
                ),
                MCONFIG_VIEW_STREAM_NAME:
                StreamingMconfigCallback(
                    services,
                    service_manager,
                    service,
                    StreamedMconfigManager(),
                )
            },
            service.loop,
        )

    # Schedule periodic checkins
    checkin_manager = CheckinManager(service, service_poller)

    # Create sync rpc client with a timeout of 60 seconds
    sync_rpc_client = None
    if service.config.get('enable_sync_rpc', False):
        sync_rpc_client = SyncRPCClient(service.loop, 60)

    first_time_bootstrap = True

    # This is called when bootstrap succeeds and when _bootstrap_check is
    # invoked but bootstrap is not needed. If it's invoked right after certs
    # are generated, certs_generated is true, control_proxy will restart.
    def bootstrap_success_cb(certs_generated):
        nonlocal first_time_bootstrap
        if first_time_bootstrap:
            if stream_client:
                stream_client.start()
            checkin_manager.try_checkin()
            if sync_rpc_client:
                sync_rpc_client.start()
            first_time_bootstrap = False
        if certs_generated and 'control_proxy' in services:
            service.loop.create_task(
                service_manager.restart_services(services=['control_proxy']))

    # Create bootstrap manager
    bootstrap_manager = BootstrapManager(service, bootstrap_success_cb)

    def checkin_failure_cb(err_code):
        bootstrap_manager.on_checkin_fail(err_code)

    checkin_manager.set_failure_cb(checkin_failure_cb)

    # Start bootstrap_manager after checkin_manager's callback is set
    bootstrap_manager.start_bootstrap_manager()

    # Start all services when magmad comes up
    service.loop.create_task(service_manager.start_services())

    # Start upgrade manager loop
    if service.config.get('enable_upgrade_manager', False):
        upgrader = _get_upgrader_impl(service)
        service.loop.create_task(start_upgrade_loop(service, upgrader))

    # Start network health metric collection
    if service.config.get('enable_network_monitor', False):
        service.loop.create_task(metrics_collection_loop(service.config))

    if service.config.get('enable_systemd_tailer', False):
        service.loop.create_task(start_systemd_tailer(service.config))

    # Start loop to monitor unattended upgrade status
    service.loop.create_task(monitor_unattended_upgrade_status(service.loop))

    # Add all servicers to the server
    magmad_servicer = MagmadRpcServicer(
        service,
        services,
        service_manager,
        get_mconfig_manager(),
        service.loop,
    )
    magmad_servicer.add_to_server(service.rpc_server)

    if SDWatchdog.has_notify():
        # Create systemd watchdog
        sdwatchdog = SDWatchdog(tasks=[bootstrap_manager, checkin_manager],
                                update_status=True)
        # Start watchdog loop
        service.loop.create_task(sdwatchdog.run())

    # Run the service loop
    service.run()

    # Cleanup the service
    service.close()
    def test_update(self, config_mock):
        """
        Test that mconfig updates are handled correctly
        """
        # Set up fixture data
        # Update will simulate gateway moving from
        # test_mconfig -> updated_mconfig
        test_mconfig = GatewayConfigs()
        updated_mconfig = GatewayConfigs()

        some_any = Any()
        magmad = MagmaD(log_level=1)
        some_any.Pack(magmad)
        test_mconfig.configs_by_key['magmad'].CopyFrom(some_any)
        updated_mconfig.configs_by_key['magmad'].CopyFrom(some_any)

        metricsd = MetricsD(log_level=2)
        some_any.Pack(metricsd)
        test_mconfig.configs_by_key['metricsd'].CopyFrom(some_any)
        metricsd = MetricsD(log_level=3)
        some_any.Pack(metricsd)
        updated_mconfig.configs_by_key['metricsd'].CopyFrom(some_any)

        # Set up mock dependencies
        config_mock.return_value = {
            'magma_services': ['magmad', 'metricsd'],
        }

        @asyncio.coroutine
        def _mock_restart_services():
            return "blah"

        @asyncio.coroutine
        def _mock_update_dynamic_services():
            return "mockResponse"

        service_manager_mock = MagicMock()
        magmad_service_mock = MagicMock()
        mconfig_manager_mock = MconfigManagerImpl()

        load_mock = patch.object(
            mconfig_manager_mock,
            'load_mconfig', MagicMock(return_value=test_mconfig),
        )
        update_mock = patch.object(
            mconfig_manager_mock,
            'update_stored_mconfig', Mock(),
        )
        restart_service_mock = patch.object(
            service_manager_mock,
            'restart_services', MagicMock(wraps=_mock_restart_services),
        )
        update_dynamic_services_mock = patch.object(
            service_manager_mock,
            'update_dynamic_services', MagicMock(wraps=_mock_update_dynamic_services),
        )
        processed_updates_mock = patch('magma.magmad.events.processed_updates')

        class ServiceMconfigMock:
            def __init__(self, service, mconfig_struct):
                pass
            dynamic_services = []
        mock_mcfg = patch('magma.magmad.config_manager.load_service_mconfig', MagicMock(wraps=ServiceMconfigMock))

        with load_mock as loader,\
                update_mock as updater, \
                restart_service_mock as restarter,\
                update_dynamic_services_mock as dynamic_services,\
                mock_mcfg as mock_c,\
                processed_updates_mock as processed_updates:
            loop = asyncio.new_event_loop()
            config_manager = ConfigManager(
                ['magmad', 'metricsd'], service_manager_mock,
                magmad_service_mock, mconfig_manager_mock,
                allow_unknown_fields=False,
                loop=loop,
            )

            # Process an empty set of updates
            updates = []
            config_manager.process_update(CONFIG_STREAM_NAME, updates, False)

            # No services should be updated or restarted due to empty updates
            restarter.assert_not_called()
            updater.assert_not_called()

            # Verify that config update restarts all services
            update_str = MessageToJson(updated_mconfig)
            updates = [
                DataUpdate(value=''.encode('utf-8'), key='some key'),
                DataUpdate(
                    value=update_str.encode('utf-8'),
                    key='last key',
                ),
            ]
            config_manager.process_update(CONFIG_STREAM_NAME, updates, False)

            # Only metricsd config was updated, hence should be restarted
            self.assertEqual(loader.call_count, 1)
            restarter.assert_called_once_with(['metricsd'])
            updater.assert_called_once_with(update_str)

            configs_by_service = {
                'magmad': updated_mconfig.configs_by_key['magmad'],
                'metricsd': updated_mconfig.configs_by_key['metricsd'],
            }
            processed_updates.assert_called_once_with(configs_by_service)

            restarter.reset_mock()
            updater.reset_mock()
            processed_updates.reset_mock()

            updated_mconfig.configs_by_key['shared_mconfig'].CopyFrom(some_any)
            update_str = MessageToJson(updated_mconfig)
            updates = [
                DataUpdate(
                    value=update_str.encode('utf-8'),
                    key='last key',
                ),
            ]
            config_manager.process_update(CONFIG_STREAM_NAME, updates, False)

            # shared config update should restart all services
            restarter.assert_called_once_with(['magmad', 'metricsd'])
            updater.assert_called_once_with(update_str)
            dynamic_services.assert_called_once_with([])
            processed_updates.assert_called_once_with(configs_by_service)
            self.assertEqual(mock_c.call_count, 1)

            restarter.reset_mock()
            updater.reset_mock()
            processed_updates.reset_mock()
            dynamic_services.reset_mock()
Beispiel #5
0
def main():
    """
    Main magmad function
    """
    service = MagmaService('magmad', mconfigs_pb2.MagmaD())

    logging.info('Starting magmad for UUID: %s', snowflake.make_snowflake())

    # Create service manager
    services = service.config['magma_services']
    init_system = service.config.get('init_system', 'systemd')
    registered_dynamic_services = service.config.get(
        'registered_dynamic_services', [])
    enabled_dynamic_services = []
    if service.mconfig is not None:
        enabled_dynamic_services = service.mconfig.dynamic_services

    # Poll the services' Service303 interface
    service_poller = ServicePoller(service.loop, service.config)
    service_poller.start()

    service_manager = ServiceManager(services, init_system, service_poller,
                                     registered_dynamic_services,
                                     enabled_dynamic_services)

    # Get metrics service config
    metrics_config = service.config['metricsd']
    metrics_services = metrics_config['services']
    collect_interval = metrics_config['collect_interval']
    sync_interval = metrics_config['sync_interval']
    grpc_timeout = metrics_config['grpc_timeout']
    queue_length = metrics_config['queue_length']
    metrics_post_processor_fn = metrics_config.get('post_processing_fn')

    # Create local metrics collector
    metrics_collector = MetricsCollector(
        metrics_services,
        collect_interval,
        sync_interval,
        grpc_timeout,
        queue_length,
        service.loop,
        get_metrics_postprocessor_fn(metrics_post_processor_fn),
    )

    # Poll and sync the metrics collector loops
    metrics_collector.run()

    # Start a background thread to stream updates from the cloud
    stream_client = None
    if service.config.get('enable_config_streamer', False):
        stream_client = StreamerClient(
            {
                CONFIG_STREAM_NAME:
                ConfigManager(
                    services,
                    service_manager,
                    service,
                    MconfigManagerImpl(),
                ),
            },
            service.loop,
        )

    # Schedule periodic checkins
    checkin_manager = CheckinManager(service, service_poller)

    # Create sync rpc client with a heartbeat of 30 seconds (timeout = 60s)
    sync_rpc_client = None
    if service.config.get('enable_sync_rpc', False):
        sync_rpc_client = SyncRPCClient(service.loop, 30)

    first_time_bootstrap = True

    # This is called when bootstrap succeeds and when _bootstrap_check is
    # invoked but bootstrap is not needed. If it's invoked right after certs
    # are generated, certs_generated is true, control_proxy will restart.
    async def bootstrap_success_cb(certs_generated):
        nonlocal first_time_bootstrap
        if first_time_bootstrap:
            if stream_client:
                stream_client.start()
            await checkin_manager.try_checkin()
            if sync_rpc_client:
                sync_rpc_client.start()
            first_time_bootstrap = False
        if certs_generated and 'control_proxy' in services:
            service.loop.create_task(
                service_manager.restart_services(services=['control_proxy']))

    # Create bootstrap manager
    bootstrap_manager = BootstrapManager(service, bootstrap_success_cb)

    async def checkin_failure_cb(err_code):
        await bootstrap_manager.on_checkin_fail(err_code)

    checkin_manager.set_failure_cb(checkin_failure_cb)

    # Start bootstrap_manager after checkin_manager's callback is set
    bootstrap_manager.start_bootstrap_manager()

    # Schedule periodic state reporting
    state_manager = StateReporter(service, checkin_manager)
    state_manager.start()

    # Start all services when magmad comes up
    service.loop.create_task(service_manager.start_services())

    # Start upgrade manager loop
    if service.config.get('enable_upgrade_manager', False):
        upgrader = _get_upgrader_impl(service)
        service.loop.create_task(start_upgrade_loop(service, upgrader))

    # Start network health metric collection
    if service.config.get('enable_network_monitor', False):
        service.loop.create_task(metrics_collection_loop(service.config))

    if service.config.get('enable_systemd_tailer', False):
        service.loop.create_task(start_systemd_tailer(service.config))

    # Create generic command executor
    command_executor = None
    if service.config.get('generic_command_config', None):
        command_executor = get_command_executor_impl(service)

    # Start loop to monitor unattended upgrade status
    service.loop.create_task(monitor_unattended_upgrade_status(service.loop))

    # Add all servicers to the server
    magmad_servicer = MagmadRpcServicer(
        service,
        services,
        service_manager,
        get_mconfig_manager(),
        command_executor,
        service.loop,
    )
    magmad_servicer.add_to_server(service.rpc_server)

    if SDWatchdog.has_notify():
        # Create systemd watchdog
        sdwatchdog = SDWatchdog(tasks=[bootstrap_manager, checkin_manager],
                                update_status=True)
        # Start watchdog loop
        service.loop.create_task(sdwatchdog.run())

    # Run the service loop
    service.run()

    # Cleanup the service
    service.close()