Example #1
0
 def post(self, url=None):
     db.add_event('image', url, 'api', 'cache', None, None)
     db.enqueue(config.parsed.get('NODE_NAME'), {
         'tasks': [{
             'type': 'image_fetch',
             'url': url
         }],
     })
Example #2
0
    def delete(self, confirm=False, namespace=None):
        """Delete all instances in the namespace."""

        if confirm is not True:
            return error(400, 'parameter confirm is not set true')

        if get_jwt_identity() == 'system':
            if not isinstance(namespace, str):
                # A client using a system key must specify the namespace. This
                # ensures that deleting all instances in the cluster (by
                # specifying namespace='system') is a deliberate act.
                return error(400,
                             'system user must specify parameter namespace')

        else:
            if namespace and namespace != get_jwt_identity():
                return error(401, 'you cannot delete other namespaces')
            namespace = get_jwt_identity()

        instances_del = []
        tasks_by_node = {}
        for instance in list(db.get_instances(all=all, namespace=namespace)):
            if instance['state'] in ['deleted', 'error']:
                continue

            # If this instance is not on a node, just do the DB cleanup locally
            if not instance['node']:
                node = config.parsed.get('NODE_NAME')
            else:
                node = instance['node']

            tasks_by_node.setdefault(node, [])
            tasks_by_node[node].append({
                'type': 'instance_delete',
                'instance_uuid': instance['uuid'],
                'next_state': 'deleted',
                'next_state_message': None
            })
            instances_del.append(instance['uuid'])

        for node in tasks_by_node:
            db.enqueue(node, {'tasks': tasks_by_node[node]})

        waiting_for = copy.copy(instances_del)
        start_time = time.time()
        while (
                waiting_for and
            (time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'))):
            for instance_uuid in copy.copy(waiting_for):
                i = db.get_instance(instance_uuid)
                if i['state'] in ['deleted', 'error']:
                    waiting_for.remove(instance_uuid)

        return instances_del
Example #3
0
 def remove_dhcp(self):
     if util.is_network_node():
         subst = self.subst_dict()
         with util.RecordedOperation('remove dhcp', self):
             with db.get_object_lock(self,
                                     ttl=120,
                                     op='Network remove DHCP'):
                 d = dhcp.DHCP(self, subst['vx_veth_inner'])
                 d.remove_dhcpd()
     else:
         db.enqueue('networknode',
                    RemoveDHCPNetworkTask(self.db_entry['uuid']))
         db.add_event('network', self.db_entry['uuid'], 'remove dhcp',
                      'enqueued', None, None)
Example #4
0
 def remove_dhcp(self):
     if util.is_network_node():
         subst = self.subst_dict()
         with util.RecordedOperation('remove dhcp', self):
             with db.get_lock('network', None, self.uuid, ttl=120):
                 d = dhcp.DHCP(self.uuid, subst['vx_veth_inner'])
                 d.remove_dhcpd()
     else:
         db.enqueue('networknode', {
             'type': 'remove_dhcp',
             'network_uuid': self.uuid
         })
         db.add_event('network', self.uuid, 'remove dhcp', 'enqueued', None,
                      None)
Example #5
0
    def update_dhcp(self):
        if not self.db_entry['provide_dhcp']:
            return

        if util.is_network_node():
            subst = self.subst_dict()
            with util.RecordedOperation('update dhcp', self):
                with db.get_object_lock(self,
                                        ttl=120,
                                        op='Network update DHCP'):
                    d = dhcp.DHCP(self, subst['vx_veth_inner'])
                    d.restart_dhcpd()
        else:
            db.enqueue('networknode',
                       UpdateDHCPNetworkTask(self.db_entry['uuid']))
            db.add_event('network', self.db_entry['uuid'], 'update dhcp',
                         'enqueued', None, None)
Example #6
0
    def update_dhcp(self):
        if not self.provide_dhcp:
            return

        if util.is_network_node():
            self.ensure_mesh()
            subst = self.subst_dict()
            with util.RecordedOperation('update dhcp', self):
                with db.get_lock('network', None, self.uuid, ttl=120):
                    d = dhcp.DHCP(self.uuid, subst['vx_veth_inner'])
                    d.restart_dhcpd()
        else:
            db.enqueue('networknode', {
                'type': 'update_dhcp',
                'network_uuid': self.uuid
            })
            db.add_event('network', self.uuid, 'update dhcp', 'enqueued', None,
                         None)
Example #7
0
    def post(self,
             netblock=None,
             provide_dhcp=None,
             provide_nat=None,
             name=None,
             namespace=None):
        try:
            ipaddress.ip_network(netblock)
        except ValueError as e:
            return error(400, 'cannot parse netblock: %s' % e)

        if not namespace:
            namespace = get_jwt_identity()

        # If accessing a foreign name namespace, we need to be an admin
        if get_jwt_identity() not in [namespace, 'system']:
            return error(
                401,
                'only admins can create resources in a different namespace')

        network = db.allocate_network(netblock, provide_dhcp, provide_nat,
                                      name, namespace)
        db.add_event('network', network['uuid'], 'api', 'create', None, None)

        # Networks should immediately appear on the network node
        db.enqueue('networknode', {
            'type': 'deploy',
            'network_uuid': network['uuid']
        })

        db.add_event('network', network['uuid'], 'deploy', 'enqueued', None,
                     None)
        db.add_event('network', network['uuid'], 'api', 'created', None, None)
        db.update_network_state(network['uuid'], 'created')

        # Initialise metadata
        db.persist_metadata('network', network['uuid'], {})

        return network
Example #8
0
    def create(self):
        subst = self.subst_dict()

        with db.get_object_lock(self, ttl=120, op='Network create'):
            # Ensure network was not deleted whilst waiting for the lock.
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            if not util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('create vxlan interface', self):
                    util.create_interface(
                        subst['vx_interface'], 'vxlan',
                        'id %(vx_id)s dev %(physical_interface)s dstport 0' %
                        subst)
                    util.execute(
                        None, 'sysctl -w net.ipv4.conf.'
                        '%(vx_interface)s.arp_notify=1' % subst)

            if not util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('create vxlan bridge', self):
                    util.create_interface(subst['vx_bridge'], 'bridge', '')
                    util.execute(
                        None, 'ip link set %(vx_interface)s '
                        'master %(vx_bridge)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_interface)s up' % subst)
                    util.execute(None, 'ip link set %(vx_bridge)s up' % subst)
                    util.execute(
                        None, 'sysctl -w net.ipv4.conf.'
                        '%(vx_bridge)s.arp_notify=1' % subst)
                    util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst)
                    util.execute(None, 'brctl stp %(vx_bridge)s off' % subst)
                    util.execute(None,
                                 'brctl setageing %(vx_bridge)s 0' % subst)

        if util.is_network_node():
            if not os.path.exists('/var/run/netns/%(netns)s' % subst):
                with util.RecordedOperation('create netns', self):
                    util.execute(None, 'ip netns add %(netns)s' % subst)

            if not util.check_for_interface(subst['vx_veth_outer']):
                with util.RecordedOperation('create router veth', self):
                    util.create_interface(
                        subst['vx_veth_outer'], 'veth',
                        'peer name %(vx_veth_inner)s' % subst)
                    util.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' %
                        subst)
                    util.execute(
                        None,
                        'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_veth_outer)s up' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set %(vx_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(router)s/%(netmask)s '
                        'dev %(vx_veth_inner)s' % subst)

            if not util.check_for_interface(subst['physical_veth_outer']):
                with util.RecordedOperation('create physical veth', self):
                    util.create_interface(
                        subst['physical_veth_outer'], 'veth',
                        'peer name %(physical_veth_inner)s' % subst)
                    util.execute(
                        None, 'brctl addif %(physical_bridge)s '
                        '%(physical_veth_outer)s' % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_outer)s up' % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_inner)s '
                        'netns %(netns)s' % subst)

            self.deploy_nat()
            self.update_dhcp()
        else:
            db.enqueue('networknode', DeployNetworkTask(self.db_entry['uuid']))
            db.add_event('network', self.db_entry['uuid'], 'deploy',
                         'enqueued', None, None)
Example #9
0
def handle(jobname, workitem):
    log = LOG.withField('workitem', jobname)
    log.info('Processing workitem')

    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    instance_uuid = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            if not QueueTask.__subclasscheck__(type(task)):
                raise exceptions.UnknownTaskException(
                    'Task was not decoded: %s' % task)

            if (InstanceTask.__subclasscheck__(type(task))
                    or isinstance(task, FetchImageTask)):
                instance_uuid = task.instance_uuid()

            if instance_uuid:
                log_i = log.withInstance(instance_uuid)
            else:
                log_i = log

            log_i.withField('task_name', task.name()).info('Starting task')

            # TODO(andy) Should network events also come through here eventually?
            # Then this can be generalised to record events on networks/instances

            # TODO(andy) This event should be recorded when it is recorded as
            # dequeued in the DB. Currently it's reporting action on the item
            # and calling it 'dequeue'.

            if instance_uuid:
                # TODO(andy) move to QueueTask
                db.add_event('instance', instance_uuid,
                             task.pretty_task_name(), 'dequeued', None,
                             'Work item %s' % jobname)

            if isinstance(task, FetchImageTask):
                image_fetch(task.url(), instance_uuid)

            elif isinstance(task, PreflightInstanceTask):
                redirect_to = instance_preflight(instance_uuid, task.network())
                if redirect_to:
                    log_i.info('Redirecting instance start to %s' %
                               redirect_to)
                    db.place_instance(instance_uuid, redirect_to)
                    db.enqueue(redirect_to, workitem)
                    return

            elif isinstance(task, StartInstanceTask):
                instance_start(instance_uuid, task.network())
                db.update_instance_state(instance_uuid, 'created')
                db.enqueue('%s-metrics' % config.NODE_NAME, {})

            elif isinstance(task, DeleteInstanceTask):
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid, 'deleted')
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

            elif isinstance(task, ErrorInstanceTask):
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid, 'error')

                    if task.error_msg():
                        db.update_instance_error_message(
                            instance_uuid, task.error_msg())
                    db.enqueue('%s-metrics' % config.NODE_NAME, {})
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

            else:
                log_i.withField('task', task).error('Unhandled task - dropped')

            log_i.info('Task complete')

    except exceptions.ImageFetchTaskFailedException as e:
        # Usually caused by external issue and not an application error
        log.info('Fetch Image Error: %s', e)
        if instance_uuid:
            db.enqueue_instance_error(instance_uuid,
                                      'failed queue task: %s' % e)

    except Exception as e:
        util.ignore_exception(daemon.process_name('queues'), e)
        if instance_uuid:
            db.enqueue_instance_error(instance_uuid,
                                      'failed queue task: %s' % e)

    finally:
        db.resolve(config.NODE_NAME, jobname)
        if instance_uuid:
            db.add_event('instance', instance_uuid, 'tasks complete',
                         'dequeued', None, 'Work item %s' % jobname)
        log.info('Completed workitem')
Example #10
0
    def post(self,
             name=None,
             cpus=None,
             memory=None,
             network=None,
             disk=None,
             ssh_key=None,
             user_data=None,
             placed_on=None,
             namespace=None,
             instance_uuid=None,
             video=None):
        global SCHEDULER

        # Check that the instance name is safe for use as a DNS host name
        if name != re.sub(r'([^a-zA-Z0-9_\-])', '', name) or len(name) > 63:
            return error(400,
                         'instance name must be useable as a DNS host name')

        # Sanity check
        if not disk:
            return error(400, 'instance must specify at least one disk')
        for d in disk:
            if not isinstance(d, dict):
                return error(400,
                             'disk specification should contain JSON objects')

        if network:
            for n in network:
                if not isinstance(n, dict):
                    return error(
                        400,
                        'network specification should contain JSON objects')

                if 'network_uuid' not in n:
                    return error(
                        400, 'network specification is missing network_uuid')

        if not video:
            video = {'model': 'cirrus', 'memory': 16384}

        if not namespace:
            namespace = get_jwt_identity()

        # Only system can specify a uuid
        if instance_uuid and get_jwt_identity() != 'system':
            return error(401, 'only system can specify an instance uuid')

        # If accessing a foreign namespace, we need to be an admin
        if get_jwt_identity() not in [namespace, 'system']:
            return error(
                401,
                'only admins can create resources in a different namespace')

        # The instance needs to exist in the DB before network interfaces are created
        if not instance_uuid:
            instance_uuid = str(uuid.uuid4())
            db.add_event('instance', instance_uuid, 'uuid allocated', None,
                         None, None)

        # Create instance object
        instance = virt.from_db(instance_uuid)
        if instance:
            if get_jwt_identity() not in [
                    instance.db_entry['namespace'], 'system'
            ]:
                logutil.info([virt.ThinInstance(instance_uuid)],
                             'Instance not found, ownership test')
                return error(404, 'instance not found')

        if not instance:
            instance = virt.from_definition(uuid=instance_uuid,
                                            name=name,
                                            disks=disk,
                                            memory_mb=memory,
                                            vcpus=cpus,
                                            ssh_key=ssh_key,
                                            user_data=user_data,
                                            owner=namespace,
                                            video=video,
                                            requested_placement=placed_on)

        # Initialise metadata
        db.persist_metadata('instance', instance_uuid, {})

        # Allocate IP addresses
        order = 0
        if network:
            for netdesc in network:
                n = net.from_db(netdesc['network_uuid'])
                if not n:
                    db.enqueue_instance_delete(
                        config.parsed.get('NODE_NAME'), instance_uuid, 'error',
                        'missing network %s during IP allocation phase' %
                        netdesc['network_uuid'])
                    return error(
                        404, 'network %s not found' % netdesc['network_uuid'])

                with db.get_lock('ipmanager',
                                 None,
                                 netdesc['network_uuid'],
                                 ttl=120):
                    db.add_event('network', netdesc['network_uuid'],
                                 'allocate address', None, None, instance_uuid)
                    ipm = db.get_ipmanager(netdesc['network_uuid'])
                    if 'address' not in netdesc or not netdesc['address']:
                        netdesc['address'] = ipm.get_random_free_address()
                    else:
                        if not ipm.reserve(netdesc['address']):
                            db.enqueue_instance_delete(
                                config.parsed.get('NODE_NAME'), instance_uuid,
                                'error',
                                'failed to reserve an IP on network %s' %
                                netdesc['network_uuid'])
                            return error(
                                409, 'address %s in use' % netdesc['address'])

                    db.persist_ipmanager(netdesc['network_uuid'], ipm.save())

                if 'model' not in netdesc or not netdesc['model']:
                    netdesc['model'] = 'virtio'

                db.create_network_interface(str(uuid.uuid4()), netdesc,
                                            instance_uuid, order)

        if not SCHEDULER:
            SCHEDULER = scheduler.Scheduler()

        try:
            # Have we been placed?
            if not placed_on:
                candidates = SCHEDULER.place_instance(instance, network)
                placement = candidates[0]

            else:
                SCHEDULER.place_instance(instance,
                                         network,
                                         candidates=[placed_on])
                placement = placed_on

        except exceptions.LowResourceException as e:
            db.add_event('instance', instance_uuid, 'schedule', 'failed', None,
                         'insufficient resources: ' + str(e))
            db.enqueue_instance_delete(config.parsed.get('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'scheduling failed')
            return error(507, str(e))

        except exceptions.CandidateNodeNotFoundException as e:
            db.add_event('instance', instance_uuid, 'schedule', 'failed', None,
                         'candidate node not found: ' + str(e))
            db.enqueue_instance_delete(config.get.parsed('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'scheduling failed')
            return error(404, 'node not found: %s' % e)

        # Record placement
        db.place_instance(instance_uuid, placement)
        db.add_event('instance', instance_uuid, 'placement', None, None,
                     placement)

        # Create a queue entry for the instance start
        tasks = [{
            'type': 'instance_preflight',
            'instance_uuid': instance_uuid,
            'network': network
        }]
        for disk in instance.db_entry['block_devices']['devices']:
            if 'base' in disk and disk['base']:
                tasks.append({
                    'type': 'image_fetch',
                    'instance_uuid': instance_uuid,
                    'url': disk['base']
                })
        tasks.append({
            'type': 'instance_start',
            'instance_uuid': instance_uuid,
            'network': network
        })

        # Enqueue creation tasks on desired node task queue
        db.enqueue(placement, {'tasks': tasks})
        db.add_event('instance', instance_uuid, 'create', 'enqueued', None,
                     None)

        # Watch for a while and return results if things are fast, give up
        # after a while and just return the current state
        start_time = time.time()
        while time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'):
            i = db.get_instance(instance_uuid)
            if i['state'] in ['created', 'deleted', 'error']:
                return i
            time.sleep(0.5)
        return i
Example #11
0
    def create(self):
        subst = self.subst_dict()

        with db.get_lock('network', None, self.uuid, ttl=120):
            if not util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('create vxlan interface', self):
                    util.execute(
                        None,
                        'ip link add %(vx_interface)s type vxlan id %(vx_id)s '
                        'dev %(physical_interface)s dstport 0' % subst)
                    util.execute(
                        None,
                        'sysctl -w net.ipv4.conf.%(vx_interface)s.arp_notify=1'
                        % subst)

            if not util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('create vxlan bridge', self):
                    util.execute(
                        None, 'ip link add %(vx_bridge)s type bridge' % subst)
                    util.execute(
                        None,
                        'ip link set %(vx_interface)s master %(vx_bridge)s' %
                        subst)
                    util.execute(None,
                                 'ip link set %(vx_interface)s up' % subst)
                    util.execute(None, 'ip link set %(vx_bridge)s up' % subst)
                    util.execute(
                        None,
                        'sysctl -w net.ipv4.conf.%(vx_bridge)s.arp_notify=1' %
                        subst)
                    util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst)
                    util.execute(None, 'brctl stp %(vx_bridge)s off' % subst)
                    util.execute(None,
                                 'brctl setageing %(vx_bridge)s 0' % subst)

        if util.is_network_node():
            if not os.path.exists('/var/run/netns/%(netns)s' % subst):
                with util.RecordedOperation('create netns', self):
                    util.execute(None, 'ip netns add %(netns)s' % subst)

            if not util.check_for_interface(subst['vx_veth_outer']):
                with util.RecordedOperation('create router veth', self):
                    util.execute(
                        None,
                        'ip link add %(vx_veth_outer)s type veth peer name %(vx_veth_inner)s'
                        % subst)
                    util.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' %
                        subst)
                    util.execute(
                        None,
                        'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_veth_outer)s up' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set %(vx_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(router)s/%(netmask)s dev %(vx_veth_inner)s'
                        % subst)

            if not util.check_for_interface(subst['physical_veth_outer']):
                with util.RecordedOperation('create physical veth', self):
                    util.execute(
                        None,
                        'ip link add %(physical_veth_outer)s type veth peer name '
                        '%(physical_veth_inner)s' % subst)
                    util.execute(
                        None,
                        'brctl addif %(physical_bridge)s %(physical_veth_outer)s'
                        % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_outer)s up' % subst)
                    util.execute(
                        None,
                        'ip link set %(physical_veth_inner)s netns %(netns)s' %
                        subst)

            self.deploy_nat()
            self.update_dhcp()
        else:
            db.enqueue('networknode', {
                'type': 'deploy',
                'network_uuid': self.uuid
            })
            db.add_event('network', self.uuid, 'deploy', 'enqueued', None,
                         None)
Example #12
0
def handle(jobname, workitem):
    j = JobName(jobname)
    logutil.info([j], 'Processing workitem')
    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    instance_uuid = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            ro = [j]
            instance_uuid = task.get('instance_uuid')
            if instance_uuid:
                i = virt.from_db(instance_uuid)
                ro.append(i)

            if task.get('type').startswith('instance_') and not instance_uuid:
                logutil.error(ro, 'Instance task lacks instance uuid')
                return

            if instance_uuid:
                db.add_event('instance', instance_uuid,
                             task.get('type').replace('_', ' '), 'dequeued',
                             None, 'Work item %s' % jobname)

            logutil.info(
                ro,
                'Executing task %s: %s' % (task.get('type', 'unknown'), task))
            if task.get('type') == 'image_fetch':
                image_fetch(task.get('url'), instance_uuid)

            if task.get('type') == 'instance_preflight':
                redirect_to = instance_preflight(instance_uuid,
                                                 task.get('network'))
                if redirect_to:
                    util.log('info', ro,
                             'Redirecting instance start to %s' % redirect_to)
                    db.place_instance(instance_uuid, redirect_to)
                    db.enqueue(redirect_to, workitem)
                    return

            if task.get('type') == 'instance_start':
                instance_start(instance_uuid, task.get('network'))
                db.update_instance_state(instance_uuid, 'created')
                db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {})

            if task.get('type') == 'instance_delete':
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid,
                                             task.get('next_state', 'unknown'))
                    if task.get('next_state_message'):
                        db.update_instance_error_message(
                            instance_uuid, task.get('next_state_message'))
                    db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'),
                               {})
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

    except Exception as e:
        if instance_uuid:
            util.ignore_exception(daemon.process_name('queues'), e)
            db.enqueue_instance_delete(config.parsed.get('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'failed queue task: %s' % e)

    finally:
        db.resolve(config.parsed.get('NODE_NAME'), jobname)
        if instance_uuid:
            db.add_event('instance', instance_uuid, 'tasks complete',
                         'dequeued', None, 'Work item %s' % jobname)
        logutil.info([j], 'Completed workitem')