def get_node(fqdn): return etcd.get('node', None, fqdn)
def get_metadata(object_type, name): return etcd.get('metadata', object_type, name)
def get_node_vxid_mapping(node): etcd.get('vxid_mapping', None, node)
def get_metrics(fqdn): d = etcd.get('metrics', fqdn, None) if not d: return {} return d.get('metrics', {})
def get_namespace(namespace): return etcd.get('namespace', None, namespace)
def get_ipmanager(network_uuid): ipm = etcd.get('ipmanager', None, network_uuid) if not ipm: raise Exception('IP Manager not found for network %s' % network_uuid) return ipmanager.from_db(ipm)
def get_interface(interface_uuid): return etcd.get('networkinterface', None, interface_uuid)
def get_image_metadata(url_hash, node=None): return etcd.get('image', url_hash, node)
def handle(jobname, workitem): libvirt = util_libvirt.get_libvirt() log = LOG.with_field('workitem', jobname) log.info('Processing workitem') setproctitle.setproctitle('%s-%s' % (daemon.process_name('queues'), jobname)) inst = None task = None try: for task in workitem.get('tasks', []): if not QueueTask.__subclasscheck__(type(task)): raise exceptions.UnknownTaskException( 'Task was not decoded: %s' % task) if InstanceTask.__subclasscheck__(type(task)): inst = instance.Instance.from_db(task.instance_uuid()) if not inst: raise exceptions.InstanceNotInDBException( task.instance_uuid()) if isinstance(task, FetchImageTask): inst = instance.Instance.from_db(task.instance_uuid()) if isinstance(task, SnapshotTask): inst = instance.Instance.from_db(task.instance_uuid()) if inst: log_i = log.with_instance(inst) else: log_i = log log_i.with_field('task_name', task.name()).info('Starting task') # TODO(andy) Should network events also come through here eventually? # Then this can be generalised to record events on networks/instances # TODO(andy) This event should be recorded when it is recorded as # dequeued in the DB. Currently it's reporting action on the item # and calling it 'dequeue'. if inst: # TODO(andy) move to QueueTask db.add_event('instance', inst.uuid, task.pretty_task_name(), 'dequeued', None, 'Work item %s' % jobname) if isinstance(task, FetchImageTask): image_fetch(task.url(), inst) elif isinstance(task, PreflightInstanceTask): if (inst.state.value == dbo.STATE_DELETED or inst.state.value.endswith('-error')): log_i.warning( 'You cannot preflight an instance in state %s, skipping task' % inst.state.value) continue redirect_to = instance_preflight(inst, task.network()) if redirect_to: log_i.info('Redirecting instance start to %s' % redirect_to) etcd.enqueue(redirect_to, workitem) return elif isinstance(task, StartInstanceTask): if (inst.state.value == dbo.STATE_DELETED or inst.state.value.endswith('-error')): log_i.warning( 'You cannot start an instance in state %s, skipping task' % inst.state.value) continue instance_start(inst, task.network()) etcd.enqueue('%s-metrics' % config.NODE_NAME, {}) elif isinstance(task, DeleteInstanceTask): try: instance_delete(inst) etcd.enqueue('%s-metrics' % config.NODE_NAME, {}) except Exception as e: util_general.ignore_exception( 'instance %s delete task' % inst, e) elif isinstance(task, FloatNetworkInterfaceTask): # Just punt it to the network node now that the interface is ready etcd.enqueue('networknode', task) elif isinstance(task, SnapshotTask): snapshot(inst, task.disk(), task.artifact_uuid(), task.blob_uuid()) elif isinstance(task, DeleteNetworkWhenClean): # Check if any interfaces remain on network task_network = net.Network.from_db(task.network_uuid()) ifaces = networkinterface.interfaces_for_network(task_network) cur_interfaces = {i.uuid: i for i in ifaces} if cur_interfaces: LOG.with_network(task_network).error( 'During DeleteNetworkWhenClean new interfaces have ' 'connected to network: %s', cur_interfaces) # Only check those present at delete task initiation time. remain_interfaces = list( set(task.wait_interfaces()) & set(cur_interfaces)) if remain_interfaces: # Queue task on a node with a remaining instance first_iface = cur_interfaces[remain_interfaces[0]] inst = instance.Instance.from_db(first_iface.instance_uuid) etcd.enqueue(inst.placement['node'], { 'tasks': [ DeleteNetworkWhenClean(task.network_uuid(), remain_interfaces) ] }, delay=60) else: # All original instances deleted, safe to delete network etcd.enqueue('networknode', DestroyNetworkTask(task.network_uuid())) elif isinstance(task, HypervisorDestroyNetworkTask): n = net.Network.from_db(task.network_uuid()) n.delete_on_hypervisor() elif isinstance(task, FetchBlobTask): metrics = etcd.get('metrics', config.NODE_NAME, None) if metrics: metrics = metrics.get('metrics', {}) else: metrics = {} b = blob.Blob.from_db(task.blob_uuid()) if not b: log.with_fields({ 'blob': task.blob_uuid() }).info('Cannot replicate blob, not found') elif (int(metrics.get('disk_free_blobs', 0)) - int(b.size) < config.MINIMUM_FREE_DISK): log.with_fields({ 'blob': task.blob_uuid() }).info('Cannot replicate blob, insufficient space') else: log.with_object(b).info('Replicating blob') size = b.ensure_local([]) log.with_object(b).with_fields({ 'transferred': size, 'expected': b.size }).info('Replicating blob complete') else: log_i.with_field('task', task).error('Unhandled task - dropped') log_i.info('Task complete') except exceptions.ImageFetchTaskFailedException as e: # Usually caused by external issue and not an application error log.info('Fetch Image Error: %s', e) if inst: inst.enqueue_delete_due_error('Image fetch failed: %s' % e) except exceptions.ImagesCannotShrinkException as e: log.info('Fetch Resize Error: %s', e) if inst: inst.enqueue_delete_due_error('Image resize failed: %s' % e) except libvirt.libvirtError as e: log.info('Libvirt Error: %s', e) if inst: inst.enqueue_delete_due_error('Instance task failed: %s' % e) except exceptions.InstanceException as e: log.info('Instance Error: %s', e) if inst: inst.enqueue_delete_due_error('Instance task failed: %s' % e) except Exception as e: # Logging ignored exception - this should be investigated util_general.ignore_exception('queue worker', e) if inst: inst.enqueue_delete_due_error('Failed queue task: %s' % e) finally: etcd.resolve(config.NODE_NAME, jobname) if inst: inst.add_event('tasks complete', 'dequeued', msg='Work item %s' % jobname) log.info('Completed workitem')
def get_instance(instance_uuid): return etcd.get('instance', None, instance_uuid)
def get_network(network_uuid): return etcd.get('network', None, network_uuid)
def get_network(network_uuid): see_this_node() return etcd.get('network', None, network_uuid)
def get_metrics(fqdn): see_this_node() d = etcd.get('metrics', fqdn, None) return d['metrics']
def get_node(fqdn): see_this_node() return etcd.get('node', None, fqdn)
def get_interface(interface_uuid): see_this_node() return etcd.get('networkinterface', None, interface_uuid)
def get_instance(instance_uuid): see_this_node() return etcd.get('instance', None, instance_uuid)