def post(self, url=None): db.add_event('image', url, 'api', 'cache', None, None) db.enqueue(config.parsed.get('NODE_NAME'), { 'tasks': [{ 'type': 'image_fetch', 'url': url }], })
def delete(self, confirm=False, namespace=None): """Delete all instances in the namespace.""" if confirm is not True: return error(400, 'parameter confirm is not set true') if get_jwt_identity() == 'system': if not isinstance(namespace, str): # A client using a system key must specify the namespace. This # ensures that deleting all instances in the cluster (by # specifying namespace='system') is a deliberate act. return error(400, 'system user must specify parameter namespace') else: if namespace and namespace != get_jwt_identity(): return error(401, 'you cannot delete other namespaces') namespace = get_jwt_identity() instances_del = [] tasks_by_node = {} for instance in list(db.get_instances(all=all, namespace=namespace)): if instance['state'] in ['deleted', 'error']: continue # If this instance is not on a node, just do the DB cleanup locally if not instance['node']: node = config.parsed.get('NODE_NAME') else: node = instance['node'] tasks_by_node.setdefault(node, []) tasks_by_node[node].append({ 'type': 'instance_delete', 'instance_uuid': instance['uuid'], 'next_state': 'deleted', 'next_state_message': None }) instances_del.append(instance['uuid']) for node in tasks_by_node: db.enqueue(node, {'tasks': tasks_by_node[node]}) waiting_for = copy.copy(instances_del) start_time = time.time() while ( waiting_for and (time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'))): for instance_uuid in copy.copy(waiting_for): i = db.get_instance(instance_uuid) if i['state'] in ['deleted', 'error']: waiting_for.remove(instance_uuid) return instances_del
def remove_dhcp(self): if util.is_network_node(): subst = self.subst_dict() with util.RecordedOperation('remove dhcp', self): with db.get_object_lock(self, ttl=120, op='Network remove DHCP'): d = dhcp.DHCP(self, subst['vx_veth_inner']) d.remove_dhcpd() else: db.enqueue('networknode', RemoveDHCPNetworkTask(self.db_entry['uuid'])) db.add_event('network', self.db_entry['uuid'], 'remove dhcp', 'enqueued', None, None)
def remove_dhcp(self): if util.is_network_node(): subst = self.subst_dict() with util.RecordedOperation('remove dhcp', self): with db.get_lock('network', None, self.uuid, ttl=120): d = dhcp.DHCP(self.uuid, subst['vx_veth_inner']) d.remove_dhcpd() else: db.enqueue('networknode', { 'type': 'remove_dhcp', 'network_uuid': self.uuid }) db.add_event('network', self.uuid, 'remove dhcp', 'enqueued', None, None)
def update_dhcp(self): if not self.db_entry['provide_dhcp']: return if util.is_network_node(): subst = self.subst_dict() with util.RecordedOperation('update dhcp', self): with db.get_object_lock(self, ttl=120, op='Network update DHCP'): d = dhcp.DHCP(self, subst['vx_veth_inner']) d.restart_dhcpd() else: db.enqueue('networknode', UpdateDHCPNetworkTask(self.db_entry['uuid'])) db.add_event('network', self.db_entry['uuid'], 'update dhcp', 'enqueued', None, None)
def update_dhcp(self): if not self.provide_dhcp: return if util.is_network_node(): self.ensure_mesh() subst = self.subst_dict() with util.RecordedOperation('update dhcp', self): with db.get_lock('network', None, self.uuid, ttl=120): d = dhcp.DHCP(self.uuid, subst['vx_veth_inner']) d.restart_dhcpd() else: db.enqueue('networknode', { 'type': 'update_dhcp', 'network_uuid': self.uuid }) db.add_event('network', self.uuid, 'update dhcp', 'enqueued', None, None)
def post(self, netblock=None, provide_dhcp=None, provide_nat=None, name=None, namespace=None): try: ipaddress.ip_network(netblock) except ValueError as e: return error(400, 'cannot parse netblock: %s' % e) if not namespace: namespace = get_jwt_identity() # If accessing a foreign name namespace, we need to be an admin if get_jwt_identity() not in [namespace, 'system']: return error( 401, 'only admins can create resources in a different namespace') network = db.allocate_network(netblock, provide_dhcp, provide_nat, name, namespace) db.add_event('network', network['uuid'], 'api', 'create', None, None) # Networks should immediately appear on the network node db.enqueue('networknode', { 'type': 'deploy', 'network_uuid': network['uuid'] }) db.add_event('network', network['uuid'], 'deploy', 'enqueued', None, None) db.add_event('network', network['uuid'], 'api', 'created', None, None) db.update_network_state(network['uuid'], 'created') # Initialise metadata db.persist_metadata('network', network['uuid'], {}) return network
def create(self): subst = self.subst_dict() with db.get_object_lock(self, ttl=120, op='Network create'): # Ensure network was not deleted whilst waiting for the lock. if self.is_dead(): raise DeadNetwork('network=%s' % self) if not util.check_for_interface(subst['vx_interface']): with util.RecordedOperation('create vxlan interface', self): util.create_interface( subst['vx_interface'], 'vxlan', 'id %(vx_id)s dev %(physical_interface)s dstport 0' % subst) util.execute( None, 'sysctl -w net.ipv4.conf.' '%(vx_interface)s.arp_notify=1' % subst) if not util.check_for_interface(subst['vx_bridge']): with util.RecordedOperation('create vxlan bridge', self): util.create_interface(subst['vx_bridge'], 'bridge', '') util.execute( None, 'ip link set %(vx_interface)s ' 'master %(vx_bridge)s' % subst) util.execute(None, 'ip link set %(vx_interface)s up' % subst) util.execute(None, 'ip link set %(vx_bridge)s up' % subst) util.execute( None, 'sysctl -w net.ipv4.conf.' '%(vx_bridge)s.arp_notify=1' % subst) util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst) util.execute(None, 'brctl stp %(vx_bridge)s off' % subst) util.execute(None, 'brctl setageing %(vx_bridge)s 0' % subst) if util.is_network_node(): if not os.path.exists('/var/run/netns/%(netns)s' % subst): with util.RecordedOperation('create netns', self): util.execute(None, 'ip netns add %(netns)s' % subst) if not util.check_for_interface(subst['vx_veth_outer']): with util.RecordedOperation('create router veth', self): util.create_interface( subst['vx_veth_outer'], 'veth', 'peer name %(vx_veth_inner)s' % subst) util.execute( None, 'ip link set %(vx_veth_inner)s netns %(netns)s' % subst) util.execute( None, 'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst) util.execute(None, 'ip link set %(vx_veth_outer)s up' % subst) util.execute( None, '%(in_netns)s ip link set %(vx_veth_inner)s up' % subst) util.execute( None, '%(in_netns)s ip addr add %(router)s/%(netmask)s ' 'dev %(vx_veth_inner)s' % subst) if not util.check_for_interface(subst['physical_veth_outer']): with util.RecordedOperation('create physical veth', self): util.create_interface( subst['physical_veth_outer'], 'veth', 'peer name %(physical_veth_inner)s' % subst) util.execute( None, 'brctl addif %(physical_bridge)s ' '%(physical_veth_outer)s' % subst) util.execute( None, 'ip link set %(physical_veth_outer)s up' % subst) util.execute( None, 'ip link set %(physical_veth_inner)s ' 'netns %(netns)s' % subst) self.deploy_nat() self.update_dhcp() else: db.enqueue('networknode', DeployNetworkTask(self.db_entry['uuid'])) db.add_event('network', self.db_entry['uuid'], 'deploy', 'enqueued', None, None)
def handle(jobname, workitem): log = LOG.withField('workitem', jobname) log.info('Processing workitem') setproctitle.setproctitle('%s-%s' % (daemon.process_name('queues'), jobname)) instance_uuid = None task = None try: for task in workitem.get('tasks', []): if not QueueTask.__subclasscheck__(type(task)): raise exceptions.UnknownTaskException( 'Task was not decoded: %s' % task) if (InstanceTask.__subclasscheck__(type(task)) or isinstance(task, FetchImageTask)): instance_uuid = task.instance_uuid() if instance_uuid: log_i = log.withInstance(instance_uuid) else: log_i = log log_i.withField('task_name', task.name()).info('Starting task') # TODO(andy) Should network events also come through here eventually? # Then this can be generalised to record events on networks/instances # TODO(andy) This event should be recorded when it is recorded as # dequeued in the DB. Currently it's reporting action on the item # and calling it 'dequeue'. if instance_uuid: # TODO(andy) move to QueueTask db.add_event('instance', instance_uuid, task.pretty_task_name(), 'dequeued', None, 'Work item %s' % jobname) if isinstance(task, FetchImageTask): image_fetch(task.url(), instance_uuid) elif isinstance(task, PreflightInstanceTask): redirect_to = instance_preflight(instance_uuid, task.network()) if redirect_to: log_i.info('Redirecting instance start to %s' % redirect_to) db.place_instance(instance_uuid, redirect_to) db.enqueue(redirect_to, workitem) return elif isinstance(task, StartInstanceTask): instance_start(instance_uuid, task.network()) db.update_instance_state(instance_uuid, 'created') db.enqueue('%s-metrics' % config.NODE_NAME, {}) elif isinstance(task, DeleteInstanceTask): try: instance_delete(instance_uuid) db.update_instance_state(instance_uuid, 'deleted') except Exception as e: util.ignore_exception(daemon.process_name('queues'), e) elif isinstance(task, ErrorInstanceTask): try: instance_delete(instance_uuid) db.update_instance_state(instance_uuid, 'error') if task.error_msg(): db.update_instance_error_message( instance_uuid, task.error_msg()) db.enqueue('%s-metrics' % config.NODE_NAME, {}) except Exception as e: util.ignore_exception(daemon.process_name('queues'), e) else: log_i.withField('task', task).error('Unhandled task - dropped') log_i.info('Task complete') except exceptions.ImageFetchTaskFailedException as e: # Usually caused by external issue and not an application error log.info('Fetch Image Error: %s', e) if instance_uuid: db.enqueue_instance_error(instance_uuid, 'failed queue task: %s' % e) except Exception as e: util.ignore_exception(daemon.process_name('queues'), e) if instance_uuid: db.enqueue_instance_error(instance_uuid, 'failed queue task: %s' % e) finally: db.resolve(config.NODE_NAME, jobname) if instance_uuid: db.add_event('instance', instance_uuid, 'tasks complete', 'dequeued', None, 'Work item %s' % jobname) log.info('Completed workitem')
def post(self, name=None, cpus=None, memory=None, network=None, disk=None, ssh_key=None, user_data=None, placed_on=None, namespace=None, instance_uuid=None, video=None): global SCHEDULER # Check that the instance name is safe for use as a DNS host name if name != re.sub(r'([^a-zA-Z0-9_\-])', '', name) or len(name) > 63: return error(400, 'instance name must be useable as a DNS host name') # Sanity check if not disk: return error(400, 'instance must specify at least one disk') for d in disk: if not isinstance(d, dict): return error(400, 'disk specification should contain JSON objects') if network: for n in network: if not isinstance(n, dict): return error( 400, 'network specification should contain JSON objects') if 'network_uuid' not in n: return error( 400, 'network specification is missing network_uuid') if not video: video = {'model': 'cirrus', 'memory': 16384} if not namespace: namespace = get_jwt_identity() # Only system can specify a uuid if instance_uuid and get_jwt_identity() != 'system': return error(401, 'only system can specify an instance uuid') # If accessing a foreign namespace, we need to be an admin if get_jwt_identity() not in [namespace, 'system']: return error( 401, 'only admins can create resources in a different namespace') # The instance needs to exist in the DB before network interfaces are created if not instance_uuid: instance_uuid = str(uuid.uuid4()) db.add_event('instance', instance_uuid, 'uuid allocated', None, None, None) # Create instance object instance = virt.from_db(instance_uuid) if instance: if get_jwt_identity() not in [ instance.db_entry['namespace'], 'system' ]: logutil.info([virt.ThinInstance(instance_uuid)], 'Instance not found, ownership test') return error(404, 'instance not found') if not instance: instance = virt.from_definition(uuid=instance_uuid, name=name, disks=disk, memory_mb=memory, vcpus=cpus, ssh_key=ssh_key, user_data=user_data, owner=namespace, video=video, requested_placement=placed_on) # Initialise metadata db.persist_metadata('instance', instance_uuid, {}) # Allocate IP addresses order = 0 if network: for netdesc in network: n = net.from_db(netdesc['network_uuid']) if not n: db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'missing network %s during IP allocation phase' % netdesc['network_uuid']) return error( 404, 'network %s not found' % netdesc['network_uuid']) with db.get_lock('ipmanager', None, netdesc['network_uuid'], ttl=120): db.add_event('network', netdesc['network_uuid'], 'allocate address', None, None, instance_uuid) ipm = db.get_ipmanager(netdesc['network_uuid']) if 'address' not in netdesc or not netdesc['address']: netdesc['address'] = ipm.get_random_free_address() else: if not ipm.reserve(netdesc['address']): db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'failed to reserve an IP on network %s' % netdesc['network_uuid']) return error( 409, 'address %s in use' % netdesc['address']) db.persist_ipmanager(netdesc['network_uuid'], ipm.save()) if 'model' not in netdesc or not netdesc['model']: netdesc['model'] = 'virtio' db.create_network_interface(str(uuid.uuid4()), netdesc, instance_uuid, order) if not SCHEDULER: SCHEDULER = scheduler.Scheduler() try: # Have we been placed? if not placed_on: candidates = SCHEDULER.place_instance(instance, network) placement = candidates[0] else: SCHEDULER.place_instance(instance, network, candidates=[placed_on]) placement = placed_on except exceptions.LowResourceException as e: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'insufficient resources: ' + str(e)) db.enqueue_instance_delete(config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'scheduling failed') return error(507, str(e)) except exceptions.CandidateNodeNotFoundException as e: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'candidate node not found: ' + str(e)) db.enqueue_instance_delete(config.get.parsed('NODE_NAME'), instance_uuid, 'error', 'scheduling failed') return error(404, 'node not found: %s' % e) # Record placement db.place_instance(instance_uuid, placement) db.add_event('instance', instance_uuid, 'placement', None, None, placement) # Create a queue entry for the instance start tasks = [{ 'type': 'instance_preflight', 'instance_uuid': instance_uuid, 'network': network }] for disk in instance.db_entry['block_devices']['devices']: if 'base' in disk and disk['base']: tasks.append({ 'type': 'image_fetch', 'instance_uuid': instance_uuid, 'url': disk['base'] }) tasks.append({ 'type': 'instance_start', 'instance_uuid': instance_uuid, 'network': network }) # Enqueue creation tasks on desired node task queue db.enqueue(placement, {'tasks': tasks}) db.add_event('instance', instance_uuid, 'create', 'enqueued', None, None) # Watch for a while and return results if things are fast, give up # after a while and just return the current state start_time = time.time() while time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'): i = db.get_instance(instance_uuid) if i['state'] in ['created', 'deleted', 'error']: return i time.sleep(0.5) return i
def create(self): subst = self.subst_dict() with db.get_lock('network', None, self.uuid, ttl=120): if not util.check_for_interface(subst['vx_interface']): with util.RecordedOperation('create vxlan interface', self): util.execute( None, 'ip link add %(vx_interface)s type vxlan id %(vx_id)s ' 'dev %(physical_interface)s dstport 0' % subst) util.execute( None, 'sysctl -w net.ipv4.conf.%(vx_interface)s.arp_notify=1' % subst) if not util.check_for_interface(subst['vx_bridge']): with util.RecordedOperation('create vxlan bridge', self): util.execute( None, 'ip link add %(vx_bridge)s type bridge' % subst) util.execute( None, 'ip link set %(vx_interface)s master %(vx_bridge)s' % subst) util.execute(None, 'ip link set %(vx_interface)s up' % subst) util.execute(None, 'ip link set %(vx_bridge)s up' % subst) util.execute( None, 'sysctl -w net.ipv4.conf.%(vx_bridge)s.arp_notify=1' % subst) util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst) util.execute(None, 'brctl stp %(vx_bridge)s off' % subst) util.execute(None, 'brctl setageing %(vx_bridge)s 0' % subst) if util.is_network_node(): if not os.path.exists('/var/run/netns/%(netns)s' % subst): with util.RecordedOperation('create netns', self): util.execute(None, 'ip netns add %(netns)s' % subst) if not util.check_for_interface(subst['vx_veth_outer']): with util.RecordedOperation('create router veth', self): util.execute( None, 'ip link add %(vx_veth_outer)s type veth peer name %(vx_veth_inner)s' % subst) util.execute( None, 'ip link set %(vx_veth_inner)s netns %(netns)s' % subst) util.execute( None, 'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst) util.execute(None, 'ip link set %(vx_veth_outer)s up' % subst) util.execute( None, '%(in_netns)s ip link set %(vx_veth_inner)s up' % subst) util.execute( None, '%(in_netns)s ip addr add %(router)s/%(netmask)s dev %(vx_veth_inner)s' % subst) if not util.check_for_interface(subst['physical_veth_outer']): with util.RecordedOperation('create physical veth', self): util.execute( None, 'ip link add %(physical_veth_outer)s type veth peer name ' '%(physical_veth_inner)s' % subst) util.execute( None, 'brctl addif %(physical_bridge)s %(physical_veth_outer)s' % subst) util.execute( None, 'ip link set %(physical_veth_outer)s up' % subst) util.execute( None, 'ip link set %(physical_veth_inner)s netns %(netns)s' % subst) self.deploy_nat() self.update_dhcp() else: db.enqueue('networknode', { 'type': 'deploy', 'network_uuid': self.uuid }) db.add_event('network', self.uuid, 'deploy', 'enqueued', None, None)
def handle(jobname, workitem): j = JobName(jobname) logutil.info([j], 'Processing workitem') setproctitle.setproctitle('%s-%s' % (daemon.process_name('queues'), jobname)) instance_uuid = None task = None try: for task in workitem.get('tasks', []): ro = [j] instance_uuid = task.get('instance_uuid') if instance_uuid: i = virt.from_db(instance_uuid) ro.append(i) if task.get('type').startswith('instance_') and not instance_uuid: logutil.error(ro, 'Instance task lacks instance uuid') return if instance_uuid: db.add_event('instance', instance_uuid, task.get('type').replace('_', ' '), 'dequeued', None, 'Work item %s' % jobname) logutil.info( ro, 'Executing task %s: %s' % (task.get('type', 'unknown'), task)) if task.get('type') == 'image_fetch': image_fetch(task.get('url'), instance_uuid) if task.get('type') == 'instance_preflight': redirect_to = instance_preflight(instance_uuid, task.get('network')) if redirect_to: util.log('info', ro, 'Redirecting instance start to %s' % redirect_to) db.place_instance(instance_uuid, redirect_to) db.enqueue(redirect_to, workitem) return if task.get('type') == 'instance_start': instance_start(instance_uuid, task.get('network')) db.update_instance_state(instance_uuid, 'created') db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {}) if task.get('type') == 'instance_delete': try: instance_delete(instance_uuid) db.update_instance_state(instance_uuid, task.get('next_state', 'unknown')) if task.get('next_state_message'): db.update_instance_error_message( instance_uuid, task.get('next_state_message')) db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {}) except Exception as e: util.ignore_exception(daemon.process_name('queues'), e) except Exception as e: if instance_uuid: util.ignore_exception(daemon.process_name('queues'), e) db.enqueue_instance_delete(config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'failed queue task: %s' % e) finally: db.resolve(config.parsed.get('NODE_NAME'), jobname) if instance_uuid: db.add_event('instance', instance_uuid, 'tasks complete', 'dequeued', None, 'Work item %s' % jobname) logutil.info([j], 'Completed workitem')