Beispiel #1
0
def get_node(fqdn):
    return etcd.get('node', None, fqdn)
Beispiel #2
0
def get_metadata(object_type, name):
    return etcd.get('metadata', object_type, name)
Beispiel #3
0
def get_node_vxid_mapping(node):
    etcd.get('vxid_mapping', None, node)
Beispiel #4
0
def get_metrics(fqdn):
    d = etcd.get('metrics', fqdn, None)
    if not d:
        return {}
    return d.get('metrics', {})
Beispiel #5
0
def get_namespace(namespace):
    return etcd.get('namespace', None, namespace)
Beispiel #6
0
def get_ipmanager(network_uuid):
    ipm = etcd.get('ipmanager', None, network_uuid)
    if not ipm:
        raise Exception('IP Manager not found for network %s' % network_uuid)
    return ipmanager.from_db(ipm)
Beispiel #7
0
def get_interface(interface_uuid):
    return etcd.get('networkinterface', None, interface_uuid)
Beispiel #8
0
def get_image_metadata(url_hash, node=None):
    return etcd.get('image', url_hash, node)
Beispiel #9
0
def handle(jobname, workitem):
    libvirt = util_libvirt.get_libvirt()

    log = LOG.with_field('workitem', jobname)
    log.info('Processing workitem')

    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    inst = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            if not QueueTask.__subclasscheck__(type(task)):
                raise exceptions.UnknownTaskException(
                    'Task was not decoded: %s' % task)

            if InstanceTask.__subclasscheck__(type(task)):
                inst = instance.Instance.from_db(task.instance_uuid())
                if not inst:
                    raise exceptions.InstanceNotInDBException(
                        task.instance_uuid())

            if isinstance(task, FetchImageTask):
                inst = instance.Instance.from_db(task.instance_uuid())

            if isinstance(task, SnapshotTask):
                inst = instance.Instance.from_db(task.instance_uuid())

            if inst:
                log_i = log.with_instance(inst)
            else:
                log_i = log

            log_i.with_field('task_name', task.name()).info('Starting task')

            # TODO(andy) Should network events also come through here eventually?
            # Then this can be generalised to record events on networks/instances

            # TODO(andy) This event should be recorded when it is recorded as
            # dequeued in the DB. Currently it's reporting action on the item
            # and calling it 'dequeue'.

            if inst:
                # TODO(andy) move to QueueTask
                db.add_event('instance', inst.uuid, task.pretty_task_name(),
                             'dequeued', None, 'Work item %s' % jobname)

            if isinstance(task, FetchImageTask):
                image_fetch(task.url(), inst)

            elif isinstance(task, PreflightInstanceTask):
                if (inst.state.value == dbo.STATE_DELETED
                        or inst.state.value.endswith('-error')):
                    log_i.warning(
                        'You cannot preflight an instance in state %s, skipping task'
                        % inst.state.value)
                    continue

                redirect_to = instance_preflight(inst, task.network())
                if redirect_to:
                    log_i.info('Redirecting instance start to %s' %
                               redirect_to)
                    etcd.enqueue(redirect_to, workitem)
                    return

            elif isinstance(task, StartInstanceTask):
                if (inst.state.value == dbo.STATE_DELETED
                        or inst.state.value.endswith('-error')):
                    log_i.warning(
                        'You cannot start an instance in state %s, skipping task'
                        % inst.state.value)
                    continue

                instance_start(inst, task.network())
                etcd.enqueue('%s-metrics' % config.NODE_NAME, {})

            elif isinstance(task, DeleteInstanceTask):
                try:
                    instance_delete(inst)
                    etcd.enqueue('%s-metrics' % config.NODE_NAME, {})
                except Exception as e:
                    util_general.ignore_exception(
                        'instance %s delete task' % inst, e)

            elif isinstance(task, FloatNetworkInterfaceTask):
                # Just punt it to the network node now that the interface is ready
                etcd.enqueue('networknode', task)

            elif isinstance(task, SnapshotTask):
                snapshot(inst, task.disk(), task.artifact_uuid(),
                         task.blob_uuid())

            elif isinstance(task, DeleteNetworkWhenClean):
                # Check if any interfaces remain on network
                task_network = net.Network.from_db(task.network_uuid())
                ifaces = networkinterface.interfaces_for_network(task_network)
                cur_interfaces = {i.uuid: i for i in ifaces}

                if cur_interfaces:
                    LOG.with_network(task_network).error(
                        'During DeleteNetworkWhenClean new interfaces have '
                        'connected to network: %s', cur_interfaces)

                # Only check those present at delete task initiation time.
                remain_interfaces = list(
                    set(task.wait_interfaces()) & set(cur_interfaces))
                if remain_interfaces:
                    # Queue task on a node with a remaining instance
                    first_iface = cur_interfaces[remain_interfaces[0]]
                    inst = instance.Instance.from_db(first_iface.instance_uuid)
                    etcd.enqueue(inst.placement['node'], {
                        'tasks': [
                            DeleteNetworkWhenClean(task.network_uuid(),
                                                   remain_interfaces)
                        ]
                    },
                                 delay=60)

                else:
                    # All original instances deleted, safe to delete network
                    etcd.enqueue('networknode',
                                 DestroyNetworkTask(task.network_uuid()))

            elif isinstance(task, HypervisorDestroyNetworkTask):
                n = net.Network.from_db(task.network_uuid())
                n.delete_on_hypervisor()

            elif isinstance(task, FetchBlobTask):
                metrics = etcd.get('metrics', config.NODE_NAME, None)
                if metrics:
                    metrics = metrics.get('metrics', {})
                else:
                    metrics = {}

                b = blob.Blob.from_db(task.blob_uuid())
                if not b:
                    log.with_fields({
                        'blob': task.blob_uuid()
                    }).info('Cannot replicate blob, not found')

                elif (int(metrics.get('disk_free_blobs', 0)) - int(b.size) <
                      config.MINIMUM_FREE_DISK):
                    log.with_fields({
                        'blob': task.blob_uuid()
                    }).info('Cannot replicate blob, insufficient space')

                else:
                    log.with_object(b).info('Replicating blob')
                    size = b.ensure_local([])
                    log.with_object(b).with_fields({
                        'transferred': size,
                        'expected': b.size
                    }).info('Replicating blob complete')

            else:
                log_i.with_field('task',
                                 task).error('Unhandled task - dropped')

            log_i.info('Task complete')

    except exceptions.ImageFetchTaskFailedException as e:
        # Usually caused by external issue and not an application error
        log.info('Fetch Image Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Image fetch failed: %s' % e)

    except exceptions.ImagesCannotShrinkException as e:
        log.info('Fetch Resize Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Image resize failed: %s' % e)

    except libvirt.libvirtError as e:
        log.info('Libvirt Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Instance task failed: %s' % e)

    except exceptions.InstanceException as e:
        log.info('Instance Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Instance task failed: %s' % e)

    except Exception as e:
        # Logging ignored exception - this should be investigated
        util_general.ignore_exception('queue worker', e)
        if inst:
            inst.enqueue_delete_due_error('Failed queue task: %s' % e)

    finally:
        etcd.resolve(config.NODE_NAME, jobname)
        if inst:
            inst.add_event('tasks complete',
                           'dequeued',
                           msg='Work item %s' % jobname)
        log.info('Completed workitem')
Beispiel #10
0
def get_instance(instance_uuid):
    return etcd.get('instance', None, instance_uuid)
Beispiel #11
0
def get_network(network_uuid):
    return etcd.get('network', None, network_uuid)
Beispiel #12
0
def get_network(network_uuid):
    see_this_node()
    return etcd.get('network', None, network_uuid)
Beispiel #13
0
def get_metrics(fqdn):
    see_this_node()
    d = etcd.get('metrics', fqdn, None)
    return d['metrics']
Beispiel #14
0
def get_node(fqdn):
    see_this_node()
    return etcd.get('node', None, fqdn)
Beispiel #15
0
def get_interface(interface_uuid):
    see_this_node()
    return etcd.get('networkinterface', None, interface_uuid)
Beispiel #16
0
def get_instance(instance_uuid):
    see_this_node()
    return etcd.get('instance', None, instance_uuid)