コード例 #1
0
ファイル: blob.py プロジェクト: mandoonandy/shakenfist
    def request_replication(self, allow_excess=0):
        with self.get_lock_attr('locations', 'Request replication'):
            locations = self.locations

            # Filter out absent locations
            for node_name in self.locations:
                n = Node.from_db(node_name)
                if n.state.value != Node.STATE_CREATED:
                    locations.remove(node_name)

            replica_count = len(locations)
            targets = config.BLOB_REPLICATION_FACTOR + allow_excess - replica_count
            self.log.info(
                'Desired replica count is %d, we have %d, excess of %d requested'
                %
                (config.BLOB_REPLICATION_FACTOR, replica_count, allow_excess))
            if targets > 0:
                blob_size_gb = int(int(self.size) / GiB)
                nodes = nodes_by_free_disk_descending(minimum=blob_size_gb +
                                                      config.MINIMUM_FREE_DISK,
                                                      intention='blobs')

                # Don't copy to locations which already have the blob
                for n in self.locations:
                    if n in nodes:
                        nodes.remove(n)

                self.log.with_field(
                    'nodes', nodes).debug('Considered for blob replication')

                for n in nodes[:targets]:
                    etcd.enqueue(n, {'tasks': [FetchBlobTask(self.uuid)]})
                    self.log.with_field('node',
                                        n).info('Instructed to replicate blob')
コード例 #2
0
ファイル: net.py プロジェクト: mandoonandy/shakenfist
 def remove_dhcp(self):
     if config.NODE_IS_NETWORK_NODE:
         subst = self.subst_dict()
         with util_general.RecordedOperation('remove dhcp', self):
             with self.get_lock(op='Network remove DHCP'):
                 d = dhcp.DHCP(self, subst['vx_veth_inner'])
                 d.remove_dhcpd()
     else:
         etcd.enqueue('networknode', RemoveDHCPNetworkTask(self.uuid))
コード例 #3
0
    def post(self, interface_uuid=None):
        ni, n, err = api_util.safe_get_network_interface(interface_uuid)
        if err:
            return err

        err = api_util.assign_floating_ip(ni)
        if err:
            return err

        etcd.enqueue('networknode',
                     FloatNetworkInterfaceTask(n.uuid, interface_uuid))
コード例 #4
0
    def post(self, url=None):
        # The only artifact type you can force the cluster to fetch is an
        # image, so TYPE_IMAGE is assumed here. We ensure that the image exists
        # in the database in an initial state here so that it will show up in
        # image list requests. The image is fetched by the queued job later.
        a = Artifact.from_url(Artifact.TYPE_IMAGE, url)

        etcd.enqueue(config.NODE_NAME, {
            'tasks': [FetchImageTask(url)],
        })
        return a.external_view()
コード例 #5
0
ファイル: net.py プロジェクト: mandoonandy/shakenfist
    def remove_nat(self):
        if config.NODE_IS_NETWORK_NODE:
            if self.floating_gateway:
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Remove NAT'):
                    ipm = IPManager.from_db('floating')
                    ipm.release(self.floating_gateway)
                    ipm.persist()
                    self.update_floating_gateway(None)

        else:
            etcd.enqueue('networknode', RemoveNATNetworkTask(self.uuid))
コード例 #6
0
ファイル: net.py プロジェクト: mandoonandy/shakenfist
    def update_dhcp(self):
        if not self.provide_dhcp:
            return

        if config.NODE_IS_NETWORK_NODE:
            subst = self.subst_dict()
            with util_general.RecordedOperation('update dhcp', self):
                with self.get_lock(op='Network update DHCP'):
                    d = dhcp.DHCP(self, subst['vx_veth_inner'])
                    d.restart_dhcpd()
        else:
            etcd.enqueue('networknode', UpdateDHCPNetworkTask(self.uuid))
コード例 #7
0
    def delete(self):
        if self.floating['floating_address']:
            etcd.enqueue(
                'networknode',
                DefloatNetworkInterfaceTask(self.network_uuid, self.uuid))

        with db.get_lock('ipmanager', None, self.network_uuid,
                         ttl=120, op='Release fixed IP'):
            ipm = IPManager.from_db(self.network_uuid)
            ipm.release(self.ipv4)
            ipm.persist()

        self.state = dbo.STATE_DELETED
コード例 #8
0
    def post(self, interface_uuid=None):
        ni, n, err = api_util.safe_get_network_interface(interface_uuid)
        if err:
            return err

        float_net = net.Network.from_db('floating')
        if not float_net:
            return api_base.error(404, 'floating network not found')

        # Address is freed as part of the job, so code is "unbalanced" compared
        # to above for reasons.
        etcd.enqueue('networknode',
                     DefloatNetworkInterfaceTask(n.uuid, interface_uuid))
コード例 #9
0
ファイル: net.py プロジェクト: mandoonandy/shakenfist
    def delete_on_network_node(self):
        with self.get_lock(op='Network delete'):
            subst = self.subst_dict()

            if util_network.check_for_interface(subst['vx_veth_outer']):
                with util_general.RecordedOperation('delete router veth', self):
                    util_process.execute(
                        None, 'ip link delete %(vx_veth_outer)s' % subst)

            if util_network.check_for_interface(subst['egress_veth_outer']):
                with util_general.RecordedOperation('delete egress veth', self):
                    util_process.execute(
                        None,
                        'ip link delete %(egress_veth_outer)s' % subst)

            if os.path.exists('/var/run/netns/%s' % self.uuid):
                with util_general.RecordedOperation('delete netns', self):
                    util_process.execute(
                        None, 'ip netns del %s' % self.uuid)

            if self.floating_gateway:
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Network delete'):
                    ipm = IPManager.from_db('floating')
                    ipm.release(self.floating_gateway)
                    ipm.persist()
                    self.update_floating_gateway(None)

            self.state = self.STATE_DELETED

        # Ensure that all hypervisors remove this network. This is really
        # just catching strays, apart from on the network node where we
        # absolutely need to do this thing.
        for hyp in Nodes([active_nodes]):
            etcd.enqueue(hyp.uuid,
                         {'tasks': [
                             HypervisorDestroyNetworkTask(self.uuid)
                         ]})

        self.remove_dhcp()
        self.remove_nat()

        ipm = IPManager.from_db(self.uuid)
        ipm.delete()
コード例 #10
0
ファイル: net.py プロジェクト: mandoonandy/shakenfist
    def new(cls, name, namespace, netblock, provide_dhcp=False,
            provide_nat=False, uuid=None, vxid=None):

        if not uuid:
            # uuid should only be specified in testing
            uuid = str(uuid4())

        if not vxid:
            vxid = Network.allocate_vxid(uuid)

        # Pre-create the IPManager
        IPManager.new(uuid, netblock)

        Network._db_create(
            uuid,
            {
                'vxid': vxid,
                'name': name,
                'namespace': namespace,
                'netblock': netblock,
                'provide_dhcp': provide_dhcp,
                'provide_nat': provide_nat,
                'version': cls.current_version
            }
        )

        n = Network.from_db(uuid)
        n.state = Network.STATE_INITIAL

        # Networks should immediately appear on the network node
        etcd.enqueue('networknode', DeployNetworkTask(uuid))

        # TODO(andy): Integrate metadata into each object type
        # Initialise metadata
        db.persist_metadata('network', uuid, {})

        return n
コード例 #11
0
 def enqueue_delete_remote(self, node):
     etcd.enqueue(node, {'tasks': [DeleteInstanceTask(self.uuid)]})
コード例 #12
0
ファイル: db.py プロジェクト: mikalstill/shakenfist-personal
def enqueue(queuename, workitem):
    etcd.enqueue(queuename, workitem)
コード例 #13
0
ファイル: snapshot.py プロジェクト: mandoonandy/shakenfist
    def post(self,
             instance_ref=None,
             instance_from_db=None,
             all=None,
             device=None,
             max_versions=0):
        disks = instance_from_db.block_devices['devices']
        if instance_from_db.uefi:
            disks.append({
                'type':
                'nvram',
                'device':
                'nvram',
                'path':
                os.path.join(instance_from_db.instance_path, 'nvram'),
                'snapshot_ignores':
                False
            })

        # Filter if requested
        if device:
            new_disks = []
            for d in disks:
                if d['device'] == device:
                    new_disks.append(d)
            disks = new_disks
        elif not all:
            disks = [disks[0]]
        LOG.with_fields({
            'instance': instance_from_db.uuid,
            'devices': disks
        }).info('Devices for snapshot')

        out = {}
        for disk in disks:
            if disk['snapshot_ignores']:
                continue

            if disk['type'] not in ['qcow2', 'nvram']:
                continue

            if not os.path.exists(disk['path']):
                continue

            a = Artifact.from_url(
                Artifact.TYPE_SNAPSHOT, '%s%s/%s' %
                (artifact.INSTANCE_URL, instance_from_db.uuid, disk['device']),
                max_versions)

            blob_uuid = str(uuid.uuid4())
            entry = a.add_index(blob_uuid)

            out[disk['device']] = {
                'source_url': a.source_url,
                'artifact_uuid': a.uuid,
                'artifact_index': entry['index'],
                'blob_uuid': blob_uuid
            }

            if disk['type'] == 'nvram':
                # These are small and don't use qemu-img to capture, so just
                # do them now.
                blob.ensure_blob_path()
                dest_path = blob.Blob.filepath(blob_uuid)
                shutil.copyfile(disk['path'], dest_path)

                st = os.stat(dest_path)
                b = blob.Blob.new(blob_uuid, st.st_size, time.time(),
                                  time.time())
                b.observe()
                a.state = Artifact.STATE_CREATED

            else:
                etcd.enqueue(
                    config.NODE_NAME, {
                        'tasks': [
                            SnapshotTask(instance_from_db.uuid, disk, a.uuid,
                                         blob_uuid)
                        ],
                    })
            instance_from_db.add_event(
                'api',
                'snapshot of %s requested' % disk['path'].split('/')[-1], None,
                a.uuid)

        return out
コード例 #14
0
def handle(jobname, workitem):
    libvirt = util_libvirt.get_libvirt()

    log = LOG.with_field('workitem', jobname)
    log.info('Processing workitem')

    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    inst = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            if not QueueTask.__subclasscheck__(type(task)):
                raise exceptions.UnknownTaskException(
                    'Task was not decoded: %s' % task)

            if InstanceTask.__subclasscheck__(type(task)):
                inst = instance.Instance.from_db(task.instance_uuid())
                if not inst:
                    raise exceptions.InstanceNotInDBException(
                        task.instance_uuid())

            if isinstance(task, FetchImageTask):
                inst = instance.Instance.from_db(task.instance_uuid())

            if isinstance(task, SnapshotTask):
                inst = instance.Instance.from_db(task.instance_uuid())

            if inst:
                log_i = log.with_instance(inst)
            else:
                log_i = log

            log_i.with_field('task_name', task.name()).info('Starting task')

            # TODO(andy) Should network events also come through here eventually?
            # Then this can be generalised to record events on networks/instances

            # TODO(andy) This event should be recorded when it is recorded as
            # dequeued in the DB. Currently it's reporting action on the item
            # and calling it 'dequeue'.

            if inst:
                # TODO(andy) move to QueueTask
                db.add_event('instance', inst.uuid, task.pretty_task_name(),
                             'dequeued', None, 'Work item %s' % jobname)

            if isinstance(task, FetchImageTask):
                image_fetch(task.url(), inst)

            elif isinstance(task, PreflightInstanceTask):
                if (inst.state.value == dbo.STATE_DELETED
                        or inst.state.value.endswith('-error')):
                    log_i.warning(
                        'You cannot preflight an instance in state %s, skipping task'
                        % inst.state.value)
                    continue

                redirect_to = instance_preflight(inst, task.network())
                if redirect_to:
                    log_i.info('Redirecting instance start to %s' %
                               redirect_to)
                    etcd.enqueue(redirect_to, workitem)
                    return

            elif isinstance(task, StartInstanceTask):
                if (inst.state.value == dbo.STATE_DELETED
                        or inst.state.value.endswith('-error')):
                    log_i.warning(
                        'You cannot start an instance in state %s, skipping task'
                        % inst.state.value)
                    continue

                instance_start(inst, task.network())
                etcd.enqueue('%s-metrics' % config.NODE_NAME, {})

            elif isinstance(task, DeleteInstanceTask):
                try:
                    instance_delete(inst)
                    etcd.enqueue('%s-metrics' % config.NODE_NAME, {})
                except Exception as e:
                    util_general.ignore_exception(
                        'instance %s delete task' % inst, e)

            elif isinstance(task, FloatNetworkInterfaceTask):
                # Just punt it to the network node now that the interface is ready
                etcd.enqueue('networknode', task)

            elif isinstance(task, SnapshotTask):
                snapshot(inst, task.disk(), task.artifact_uuid(),
                         task.blob_uuid())

            elif isinstance(task, DeleteNetworkWhenClean):
                # Check if any interfaces remain on network
                task_network = net.Network.from_db(task.network_uuid())
                ifaces = networkinterface.interfaces_for_network(task_network)
                cur_interfaces = {i.uuid: i for i in ifaces}

                if cur_interfaces:
                    LOG.with_network(task_network).error(
                        'During DeleteNetworkWhenClean new interfaces have '
                        'connected to network: %s', cur_interfaces)

                # Only check those present at delete task initiation time.
                remain_interfaces = list(
                    set(task.wait_interfaces()) & set(cur_interfaces))
                if remain_interfaces:
                    # Queue task on a node with a remaining instance
                    first_iface = cur_interfaces[remain_interfaces[0]]
                    inst = instance.Instance.from_db(first_iface.instance_uuid)
                    etcd.enqueue(inst.placement['node'], {
                        'tasks': [
                            DeleteNetworkWhenClean(task.network_uuid(),
                                                   remain_interfaces)
                        ]
                    },
                                 delay=60)

                else:
                    # All original instances deleted, safe to delete network
                    etcd.enqueue('networknode',
                                 DestroyNetworkTask(task.network_uuid()))

            elif isinstance(task, HypervisorDestroyNetworkTask):
                n = net.Network.from_db(task.network_uuid())
                n.delete_on_hypervisor()

            elif isinstance(task, FetchBlobTask):
                metrics = etcd.get('metrics', config.NODE_NAME, None)
                if metrics:
                    metrics = metrics.get('metrics', {})
                else:
                    metrics = {}

                b = blob.Blob.from_db(task.blob_uuid())
                if not b:
                    log.with_fields({
                        'blob': task.blob_uuid()
                    }).info('Cannot replicate blob, not found')

                elif (int(metrics.get('disk_free_blobs', 0)) - int(b.size) <
                      config.MINIMUM_FREE_DISK):
                    log.with_fields({
                        'blob': task.blob_uuid()
                    }).info('Cannot replicate blob, insufficient space')

                else:
                    log.with_object(b).info('Replicating blob')
                    size = b.ensure_local([])
                    log.with_object(b).with_fields({
                        'transferred': size,
                        'expected': b.size
                    }).info('Replicating blob complete')

            else:
                log_i.with_field('task',
                                 task).error('Unhandled task - dropped')

            log_i.info('Task complete')

    except exceptions.ImageFetchTaskFailedException as e:
        # Usually caused by external issue and not an application error
        log.info('Fetch Image Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Image fetch failed: %s' % e)

    except exceptions.ImagesCannotShrinkException as e:
        log.info('Fetch Resize Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Image resize failed: %s' % e)

    except libvirt.libvirtError as e:
        log.info('Libvirt Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Instance task failed: %s' % e)

    except exceptions.InstanceException as e:
        log.info('Instance Error: %s', e)
        if inst:
            inst.enqueue_delete_due_error('Instance task failed: %s' % e)

    except Exception as e:
        # Logging ignored exception - this should be investigated
        util_general.ignore_exception('queue worker', e)
        if inst:
            inst.enqueue_delete_due_error('Failed queue task: %s' % e)

    finally:
        etcd.resolve(config.NODE_NAME, jobname)
        if inst:
            inst.add_event('tasks complete',
                           'dequeued',
                           msg='Work item %s' % jobname)
        log.info('Completed workitem')
コード例 #15
0
ファイル: net.py プロジェクト: mandoonandy/shakenfist
    def _maintain_networks(self):
        LOG.info('Maintaining networks')

        # Discover what networks are present
        _, _, vxid_to_mac = util_network.discover_interfaces()

        # Determine what networks we should be on
        host_networks = []
        seen_vxids = []

        if not config.NODE_IS_NETWORK_NODE:
            # For normal nodes, just the ones we have instances for. We need
            # to use the more expensive interfaces_for_instance() method of
            # looking up instance interfaces here if the instance cachce hasn't
            # been populated yet (i.e. the instance is still being created)
            for inst in instance.Instances([instance.this_node_filter,
                                            instance.active_states_filter]):
                ifaces = inst.interfaces
                if not ifaces:
                    ifaces = list(
                        networkinterface.interfaces_for_instance(inst))

                for iface_uuid in ifaces:
                    ni = networkinterface.NetworkInterface.from_db(iface_uuid)
                    if not ni:
                        LOG.with_instance(
                            inst).with_networkinterface(
                            iface_uuid).error('Network interface does not exist')
                    elif ni.network_uuid not in host_networks:
                        host_networks.append(ni.network_uuid)
        else:
            # For network nodes, its all networks
            for n in net.Networks([baseobject.active_states_filter]):
                host_networks.append(n.uuid)

        # Ensure we are on every network we have a host for
        for network in host_networks:
            try:
                n = net.Network.from_db(network)
                if not n:
                    continue

                # If this network is in state delete_wait, then we should remove
                # it if it has no interfaces left.
                if n.state.value == dbo.STATE_DELETE_WAIT:
                    if not networkinterface.interfaces_for_network(n):
                        LOG.with_network(n).info(
                            'Removing stray delete_wait network')
                        etcd.enqueue('networknode', DestroyNetworkTask(n.uuid))

                    # We skip maintenance on all delete_wait networks
                    continue

                # Track what vxlan ids we've seen
                seen_vxids.append(n.vxid)

                if time.time() - n.state.update_time < 60:
                    # Network state changed in the last minute, punt for now
                    continue

                if not n.is_okay():
                    if config.NODE_IS_NETWORK_NODE:
                        LOG.with_network(n).info(
                            'Recreating not okay network on network node')
                        n.create_on_network_node()

                        # If the network node was missing a network, then that implies
                        # that we also need to re-create all of the floating IPs for
                        # that network.
                        for ni in networkinterface.interfaces_for_network(n):
                            if ni.floating.get('floating_address'):
                                LOG.with_fields(
                                    {
                                        'instance': ni.instance_uuid,
                                        'networkinterface': ni.uuid,
                                        'floating': ni.floating.get('floating_address')
                                    }).info('Refloating interface')
                                n.add_floating_ip(ni.floating.get(
                                    'floating_address'), ni.ipv4)
                    else:
                        LOG.with_network(n).info(
                            'Recreating not okay network on hypervisor')
                        n.create_on_hypervisor()

                n.ensure_mesh()

            except exceptions.LockException as e:
                LOG.warning(
                    'Failed to acquire lock while maintaining networks: %s' % e)
            except exceptions.DeadNetwork as e:
                LOG.with_field('exception', e).info(
                    'maintain_network attempted on dead network')
            except processutils.ProcessExecutionError as e:
                LOG.error('Network maintenance failure: %s', e)

        # Determine if there are any extra vxids
        extra_vxids = set(vxid_to_mac.keys()) - set(seen_vxids)

        # We keep a global cache of extra vxlans we've seen before, so that
        # we only warn about them when they've been stray for five minutes.
        global EXTRA_VLANS_HISTORY
        for vxid in EXTRA_VLANS_HISTORY.copy():
            if vxid not in extra_vxids:
                del EXTRA_VLANS_HISTORY[vxid]
        for vxid in extra_vxids:
            if vxid not in EXTRA_VLANS_HISTORY:
                EXTRA_VLANS_HISTORY[vxid] = time.time()

        # Warn of extra vxlans which have been present for more than five minutes
        for vxid in EXTRA_VLANS_HISTORY:
            if time.time() - EXTRA_VLANS_HISTORY[vxid] > 5 * 60:
                LOG.with_field('vxid', vxid).warning(
                    'Extra vxlan present!')