Example #1
0
    def handle(self, *arg, **options):
        if options.get('status'):
            print(Control('dispatcher').status())
            return
        if options.get('running'):
            print(Control('dispatcher').running())
            return
        if options.get('reload'):
            return Control('dispatcher').control({'control': 'reload'})

        # It's important to close these because we're _about_ to fork, and we
        # don't want the forked processes to inherit the open sockets
        # for the DB and cache connections (that way lies race conditions)
        django_connection.close()
        django_cache.close()

        # spawn a daemon thread to periodically enqueues scheduled tasks
        # (like the node heartbeat)
        periodic.run_continuously()

        consumer = None

        try:
            queues = ['tower_broadcast_all', get_local_queuename()]
            consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues,
                                     AutoscalePool(min_workers=4))
            consumer.run()
        except KeyboardInterrupt:
            logger.debug('Terminating Task Dispatcher')
            if consumer:
                consumer.stop()
Example #2
0
    def handle(self, *arg, **options):
        if options.get('status'):
            print(Control('dispatcher').status())
            return
        if options.get('running'):
            print(Control('dispatcher').running())
            return
        if options.get('reload'):
            return Control('dispatcher').control({'control': 'reload'})

        # It's important to close these because we're _about_ to fork, and we
        # don't want the forked processes to inherit the open sockets
        # for the DB and memcached connections (that way lies race conditions)
        django_connection.close()
        django_cache.close()

        # spawn a daemon thread to periodically enqueues scheduled tasks
        # (like the node heartbeat)
        periodic.run_continuously()

        reaper.reap()
        consumer = None

        # don't ship external logs inside the dispatcher's parent process
        # this exists to work around a race condition + deadlock bug on fork
        # in cpython itself:
        # https://bugs.python.org/issue37429
        AWXProxyHandler.disable()
        with Connection(settings.BROKER_URL) as conn:
            try:
                bcast = 'tower_broadcast_all'
                queues = [
                    Queue(q, Exchange(q), routing_key=q)
                    for q in (settings.AWX_CELERY_QUEUES_STATIC + [get_local_queuename()])
                ]
                queues.append(
                    Queue(
                        construct_bcast_queue_name(bcast),
                        exchange=Exchange(bcast, type='fanout'),
                        routing_key=bcast,
                        reply=True
                    )
                )
                consumer = AWXConsumer(
                    'dispatcher',
                    conn,
                    TaskWorker(),
                    queues,
                    AutoscalePool(min_workers=4)
                )
                consumer.run()
            except KeyboardInterrupt:
                logger.debug('Terminating Task Dispatcher')
                if consumer:
                    consumer.stop()
Example #3
0
    def handle(self, *arg, **options):
        if options.get('status'):
            print Control('dispatcher').status()
            return
        if options.get('running'):
            print Control('dispatcher').running()
            return
        if options.get('reload'):
            return Control('dispatcher').control({'control': 'reload'})

        # It's important to close these because we're _about_ to fork, and we
        # don't want the forked processes to inherit the open sockets
        # for the DB and memcached connections (that way lies race conditions)
        django_connection.close()
        django_cache.close()
        beat = Process(target=self.beat)
        beat.daemon = True
        beat.start()

        reaper.reap()
        consumer = None
        with Connection(settings.BROKER_URL) as conn:
            try:
                bcast = 'tower_broadcast_all'
                queues = [
                    Queue(q, Exchange(q), routing_key=q)
                    for q in (settings.AWX_CELERY_QUEUES_STATIC + [get_local_queuename()])
                ]
                queues.append(
                    Queue(
                        construct_bcast_queue_name(bcast),
                        exchange=Exchange(bcast, type='fanout'),
                        routing_key=bcast,
                        reply=True
                    )
                )
                consumer = AWXConsumer(
                    'dispatcher',
                    conn,
                    TaskWorker(),
                    queues,
                    AutoscalePool(min_workers=4)
                )
                consumer.run()
            except KeyboardInterrupt:
                logger.debug('Terminating Task Dispatcher')
                if consumer:
                    consumer.stop()
Example #4
0
    def handle(self, *arg, **options):
        if options.get('status'):
            print(Control('dispatcher').status())
            return
        if options.get('running'):
            print(Control('dispatcher').running())
            return
        if options.get('reload'):
            return Control('dispatcher').control({'control': 'reload'})

        # It's important to close these because we're _about_ to fork, and we
        # don't want the forked processes to inherit the open sockets
        # for the DB and memcached connections (that way lies race conditions)
        django_connection.close()
        django_cache.close()

        # spawn a daemon thread to periodically enqueues scheduled tasks
        # (like the node heartbeat)
        periodic.run_continuously()

        reaper.reap()
        consumer = None

        # don't ship external logs inside the dispatcher's parent process
        # this exists to work around a race condition + deadlock bug on fork
        # in cpython itself:
        # https://bugs.python.org/issue37429
        AWXProxyHandler.disable()
        try:
            queues = ['tower_broadcast_all', get_local_queuename()]
            consumer = AWXConsumerPG('dispatcher', TaskWorker(), queues,
                                     AutoscalePool(min_workers=4))
            consumer.run()
        except KeyboardInterrupt:
            logger.debug('Terminating Task Dispatcher')
            if consumer:
                consumer.stop()
Example #5
0
 def setup_method(self, test_method):
     self.pool = AutoscalePool(min_workers=2, max_workers=10)
Example #6
0
class TestAutoScaling:
    def setup_method(self, test_method):
        self.pool = AutoscalePool(min_workers=2, max_workers=10)

    def teardown_method(self, test_method):
        self.pool.stop(signal.SIGTERM)

    def test_scale_up(self):
        result_queue = multiprocessing.Queue()
        self.pool.init_workers(SlowResultWriter().work_loop, result_queue)

        # start with two workers, write an event to each worker and make it busy
        assert len(self.pool) == 2
        for i, w in enumerate(self.pool.workers):
            w.put('Hello, Worker {}'.format(0))
        assert len(self.pool) == 2

        # wait for the subprocesses to start working on their tasks and be marked busy
        time.sleep(1)
        assert self.pool.should_grow

        # write a third message, expect a new worker to spawn because all
        # workers are busy
        self.pool.write(0, 'Hello, Worker {}'.format(2))
        assert len(self.pool) == 3

    def test_scale_down(self):
        self.pool.init_workers(ResultWriter().work_loop,
                               multiprocessing.Queue())

        # start with two workers, and scale up to 10 workers
        assert len(self.pool) == 2
        for i in range(8):
            self.pool.up()
        assert len(self.pool) == 10

        # cleanup should scale down to 8 workers
        with mock.patch('awx.main.dispatch.reaper.reap') as reap:
            self.pool.cleanup()
        reap.assert_called()
        assert len(self.pool) == 2

    def test_max_scale_up(self):
        self.pool.init_workers(ResultWriter().work_loop,
                               multiprocessing.Queue())

        assert len(self.pool) == 2
        for i in range(25):
            self.pool.up()
        assert self.pool.max_workers == 10
        assert self.pool.full is True
        assert len(self.pool) == 10

    def test_equal_worker_distribution(self):
        # if all workers are busy, spawn new workers *before* adding messages
        # to an existing queue
        self.pool.init_workers(SlowResultWriter().work_loop,
                               multiprocessing.Queue)

        # start with two workers, write an event to each worker and make it busy
        assert len(self.pool) == 2
        for i in range(10):
            self.pool.write(0, 'Hello, World!')
        assert len(self.pool) == 10
        for w in self.pool.workers:
            assert w.busy
            assert len(w.managed_tasks) == 1

        # the queue is full at 10, the _next_ write should put the message into
        # a worker's backlog
        assert len(self.pool) == 10
        for w in self.pool.workers:
            assert w.messages_sent == 1
        self.pool.write(0, 'Hello, World!')
        assert len(self.pool) == 10
        assert self.pool.workers[0].messages_sent == 2

    def test_lost_worker_autoscale(self):
        # if a worker exits, it should be replaced automatically up to min_workers
        self.pool.init_workers(ResultWriter().work_loop,
                               multiprocessing.Queue())

        # start with two workers, kill one of them
        assert len(self.pool) == 2
        assert not self.pool.should_grow
        alive_pid = self.pool.workers[1].pid
        self.pool.workers[0].process.terminate()
        time.sleep(1)  # wait a moment for sigterm

        # clean up and the dead worker
        with mock.patch('awx.main.dispatch.reaper.reap') as reap:
            self.pool.cleanup()
        reap.assert_called()
        assert len(self.pool) == 1
        assert self.pool.workers[0].pid == alive_pid

        # the next queue write should replace the lost worker
        self.pool.write(0, 'Hello, Worker')
        assert len(self.pool) == 2