Example #1
0
    def _process_network_node_workitems(self):
        jobname, workitem = db.dequeue('networknode')
        try:
            if not workitem:
                time.sleep(0.2)
                return

            log_ctx = LOG.withField('workitem', workitem)
            if not NetworkTask.__subclasscheck__(type(workitem)):
                raise exceptions.UnknownTaskException(
                    'Network workitem was not decoded: %s' % workitem)

            n = net.from_db(workitem.network_uuid())
            if not n:
                log_ctx.withNetwork(workitem.network_uuid()).warning(
                    'Received work item for non-existent network')
                return

            # NOTE(mikal): there's really nothing stopping us from processing a bunch
            # of these jobs in parallel with a pool of workers, but I am not sure its
            # worth the complexity right now. Are we really going to be changing
            # networks that much?
            if isinstance(workitem, DeployNetworkTask):
                try:
                    n.create()
                    n.ensure_mesh()
                    db.add_event('network', workitem.network_uuid(),
                                 'network node', 'deploy', None, None)
                except exceptions.DeadNetwork as e:
                    log_ctx.withField('exception', e).warning(
                        'DeployNetworkTask on dead network')

            elif isinstance(workitem, UpdateDHCPNetworkTask):
                try:
                    n.create()
                    n.ensure_mesh()
                    n.update_dhcp()
                    db.add_event('network', workitem.network_uuid(),
                                 'network node', 'update dhcp', None, None)
                except exceptions.DeadNetwork as e:
                    log_ctx.withField('exception', e).warning(
                        'UpdateDHCPNetworkTask on dead network')

            elif isinstance(workitem, RemoveDHCPNetworkTask):
                n.remove_dhcp()
                db.add_event('network', workitem.network_uuid(),
                             'network node', 'remove dhcp', None, None)

        finally:
            if jobname:
                db.resolve('networknode', jobname)
Example #2
0
    def run(self):
        logutil.info(None, 'Starting')
        gauges = {
            'updated_at': Gauge('updated_at', 'The last time metrics were updated')
        }

        last_metrics = 0

        def update_metrics():
            global last_metrics

            stats = _get_stats()
            for metric in stats:
                if metric not in gauges:
                    gauges[metric] = Gauge(metric, '')
                gauges[metric].set(stats[metric])

            db.update_metrics_bulk(stats)
            logutil.info(None, 'Updated metrics')
            gauges['updated_at'].set_to_current_time()

        while True:
            try:
                jobname, _ = db.dequeue(
                    '%s-metrics' % config.parsed.get('NODE_NAME'))
                if jobname:
                    if time.time() - last_metrics > 2:
                        update_metrics()
                        last_metrics = time.time()
                    db.resolve('%s-metrics' % config.parsed.get('NODE_NAME'),
                               jobname)
                else:
                    time.sleep(0.2)

                if time.time() - last_metrics > config.parsed.get('SCHEDULER_CACHE_TIMEOUT'):
                    update_metrics()
                    last_metrics = time.time()

            except Exception as e:
                util.ignore_exception('resource statistics', e)
Example #3
0
def handle(jobname, workitem):
    log = LOG.withField('workitem', jobname)
    log.info('Processing workitem')

    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    instance_uuid = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            if not QueueTask.__subclasscheck__(type(task)):
                raise exceptions.UnknownTaskException(
                    'Task was not decoded: %s' % task)

            if (InstanceTask.__subclasscheck__(type(task))
                    or isinstance(task, FetchImageTask)):
                instance_uuid = task.instance_uuid()

            if instance_uuid:
                log_i = log.withInstance(instance_uuid)
            else:
                log_i = log

            log_i.withField('task_name', task.name()).info('Starting task')

            # TODO(andy) Should network events also come through here eventually?
            # Then this can be generalised to record events on networks/instances

            # TODO(andy) This event should be recorded when it is recorded as
            # dequeued in the DB. Currently it's reporting action on the item
            # and calling it 'dequeue'.

            if instance_uuid:
                # TODO(andy) move to QueueTask
                db.add_event('instance', instance_uuid,
                             task.pretty_task_name(), 'dequeued', None,
                             'Work item %s' % jobname)

            if isinstance(task, FetchImageTask):
                image_fetch(task.url(), instance_uuid)

            elif isinstance(task, PreflightInstanceTask):
                redirect_to = instance_preflight(instance_uuid, task.network())
                if redirect_to:
                    log_i.info('Redirecting instance start to %s' %
                               redirect_to)
                    db.place_instance(instance_uuid, redirect_to)
                    db.enqueue(redirect_to, workitem)
                    return

            elif isinstance(task, StartInstanceTask):
                instance_start(instance_uuid, task.network())
                db.update_instance_state(instance_uuid, 'created')
                db.enqueue('%s-metrics' % config.NODE_NAME, {})

            elif isinstance(task, DeleteInstanceTask):
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid, 'deleted')
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

            elif isinstance(task, ErrorInstanceTask):
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid, 'error')

                    if task.error_msg():
                        db.update_instance_error_message(
                            instance_uuid, task.error_msg())
                    db.enqueue('%s-metrics' % config.NODE_NAME, {})
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

            else:
                log_i.withField('task', task).error('Unhandled task - dropped')

            log_i.info('Task complete')

    except exceptions.ImageFetchTaskFailedException as e:
        # Usually caused by external issue and not an application error
        log.info('Fetch Image Error: %s', e)
        if instance_uuid:
            db.enqueue_instance_error(instance_uuid,
                                      'failed queue task: %s' % e)

    except Exception as e:
        util.ignore_exception(daemon.process_name('queues'), e)
        if instance_uuid:
            db.enqueue_instance_error(instance_uuid,
                                      'failed queue task: %s' % e)

    finally:
        db.resolve(config.NODE_NAME, jobname)
        if instance_uuid:
            db.add_event('instance', instance_uuid, 'tasks complete',
                         'dequeued', None, 'Work item %s' % jobname)
        log.info('Completed workitem')
Example #4
0
def handle(jobname, workitem):
    j = JobName(jobname)
    logutil.info([j], 'Processing workitem')
    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    instance_uuid = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            ro = [j]
            instance_uuid = task.get('instance_uuid')
            if instance_uuid:
                i = virt.from_db(instance_uuid)
                ro.append(i)

            if task.get('type').startswith('instance_') and not instance_uuid:
                logutil.error(ro, 'Instance task lacks instance uuid')
                return

            if instance_uuid:
                db.add_event('instance', instance_uuid,
                             task.get('type').replace('_', ' '), 'dequeued',
                             None, 'Work item %s' % jobname)

            logutil.info(
                ro,
                'Executing task %s: %s' % (task.get('type', 'unknown'), task))
            if task.get('type') == 'image_fetch':
                image_fetch(task.get('url'), instance_uuid)

            if task.get('type') == 'instance_preflight':
                redirect_to = instance_preflight(instance_uuid,
                                                 task.get('network'))
                if redirect_to:
                    util.log('info', ro,
                             'Redirecting instance start to %s' % redirect_to)
                    db.place_instance(instance_uuid, redirect_to)
                    db.enqueue(redirect_to, workitem)
                    return

            if task.get('type') == 'instance_start':
                instance_start(instance_uuid, task.get('network'))
                db.update_instance_state(instance_uuid, 'created')
                db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {})

            if task.get('type') == 'instance_delete':
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid,
                                             task.get('next_state', 'unknown'))
                    if task.get('next_state_message'):
                        db.update_instance_error_message(
                            instance_uuid, task.get('next_state_message'))
                    db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'),
                               {})
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

    except Exception as e:
        if instance_uuid:
            util.ignore_exception(daemon.process_name('queues'), e)
            db.enqueue_instance_delete(config.parsed.get('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'failed queue task: %s' % e)

    finally:
        db.resolve(config.parsed.get('NODE_NAME'), jobname)
        if instance_uuid:
            db.add_event('instance', instance_uuid, 'tasks complete',
                         'dequeued', None, 'Work item %s' % jobname)
        logutil.info([j], 'Completed workitem')