Ejemplo n.º 1
0
    def test_namespace_filter(self, mock_get_all):
        uuids = []
        for i in instance.Instances(
            [partial(baseobject.namespace_filter, 'gerkin')]):
            uuids.append(i.uuid)

        self.assertEqual(['373a165e-9720-4e14-bd0e-9612de79ff15'], uuids)
Ejemplo n.º 2
0
    def external_view(self):
        # If this is an external view, then mix back in attributes that users
        # expect
        a = self.external_view_without_index()
        a.update(self.most_recent_index)

        # Build list of instances for each blob
        blob_usage = defaultdict(list)
        for inst in instance.Instances([instance.healthy_states_filter]):
            # inst.block_devices isn't populated until the instance is created,
            # so it may not be ready yet. This means we will miss instances
            # which have been requested but not yet started.
            for d in inst.block_devices.get('devices', []):
                blob_usage[d.get('blob_uuid')].append(inst.uuid)

        # Insert blob information
        blobs = {}
        for blob_index in self.get_all_indexes():
            blob_uuid = blob_index['blob_uuid']
            b = blob.Blob.from_db(blob_uuid)
            if b:
                # Blobs might have a UUID listed but not yet be instantiated.
                # TODO(andy): Artifacts should not reference non-existent blobs
                blobs[blob_index['index']] = {
                    'uuid': blob_uuid,
                    'instances': blob_usage.get(blob_uuid, []),
                    'size': b.size,
                    'reference_count': b.ref_count,
                }
        a['blobs'] = blobs
        return a
Ejemplo n.º 3
0
    def test_state_filter_none(self, mock_get_all, mock_attr):
        uuids = []
        for i in instance.Instances(
            [partial(baseobject.state_filter, dbo.STATE_CREATED)]):
            uuids.append(i.uuid)

        self.assertEqual([], uuids)
Ejemplo n.º 4
0
    def test_placement_filter_none(self, mock_get_all, mock_attr):
        uuids = []
        for i in instance.Instances(
            [partial(instance.placement_filter, 'node1')]):
            uuids.append(i.uuid)

        self.assertEqual([], uuids)
Ejemplo n.º 5
0
    def test_base_iteration(self, mock_get_all):
        uuids = []
        for i in instance.Instances([]):
            uuids.append(i.uuid)

        self.assertEqual([
            '373a165e-9720-4e14-bd0e-9612de79ff15',
            'b078cb4e-857c-4f04-b011-751742ef5817',
            'a7c5ecec-c3a9-4774-ad1b-249d9e90e806'
        ], uuids)
Ejemplo n.º 6
0
    def test_state_filter_all(self, mock_get_all, mock_attr):
        uuids = []
        for i in instance.Instances(
            [partial(baseobject.state_filter, dbo.STATE_CREATED)]):
            uuids.append(i.uuid)

        self.assertEqual([
            '373a165e-9720-4e14-bd0e-9612de79ff15',
            'b078cb4e-857c-4f04-b011-751742ef5817',
            'a7c5ecec-c3a9-4774-ad1b-249d9e90e806'
        ], uuids)
Ejemplo n.º 7
0
    def test_placement_filter_all(self, mock_get_all, mock_attr):
        uuids = []
        for i in instance.Instances(
            [partial(instance.placement_filter, 'node1')]):
            uuids.append(i.uuid)

        self.assertEqual([
            '373a165e-9720-4e14-bd0e-9612de79ff15',
            'b078cb4e-857c-4f04-b011-751742ef5817',
            'a7c5ecec-c3a9-4774-ad1b-249d9e90e806'
        ], uuids)
Ejemplo n.º 8
0
    def instances(self):
        """Build a list of instances that are using the blob as a block device.

        Returns a list of instance UUIDs.
        """
        instance_uuids = []
        for inst in instance.Instances([instance.healthy_states_filter]):
            # inst.block_devices isn't populated until the instance is created,
            # so it may not be ready yet. This means we will miss instances
            # which have been requested but not yet started.
            for d in inst.block_devices.get('devices', []):
                if d.get('blob_uuid') == self.uuid:
                    instance_uuids.append(inst.uuid)
        return instance_uuids
Ejemplo n.º 9
0
def instance_delete(inst):
    with inst.get_lock(op='Instance delete'):
        # There are two delete state flows:
        #   - error transition states (preflight-error etc) to error
        #   - created to deleted
        #
        # We don't need delete_wait for the error states as they're already
        # in a transition state.
        if not inst.state.value.endswith('-error'):
            inst.state = dbo.STATE_DELETE_WAIT
        db.add_event('instance', inst.uuid, 'queued', 'delete', None, None)

        # Create list of networks used by instance. We cannot use the
        # interfaces cached in the instance here, because the instance
        # may have failed to get to the point where it populates that
        # field (an image fetch failure for example).
        instance_networks = []
        interfaces = []
        for ni in networkinterface.interfaces_for_instance(inst):
            if ni:
                interfaces.append(ni)
                if ni.network_uuid not in instance_networks:
                    instance_networks.append(ni.network_uuid)

        # Stop the instance
        inst.power_off()

        # Delete the instance's interfaces
        with util_general.RecordedOperation('release network addresses', inst):
            for ni in interfaces:
                ni.delete()

        # Create list of networks used by all other instances
        host_networks = []
        for i in instance.Instances(
            [instance.this_node_filter, instance.active_states_filter]):
            if not i.uuid == inst.uuid:
                for iface_uuid in inst.interfaces:
                    ni = networkinterface.NetworkInterface.from_db(iface_uuid)
                    if ni and ni.network_uuid not in host_networks:
                        host_networks.append(ni.network_uuid)

        inst.delete()

        # Check each network used by the deleted instance
        for network in instance_networks:
            n = net.Network.from_db(network)
            if n:
                # If network used by another instance, only update
                if network in host_networks:
                    if n.state.value == dbo.STATE_DELETE_WAIT:
                        # Do not update a network about to be deleted
                        continue
                    with util_general.RecordedOperation(
                            'deallocate ip address', inst):
                        n.update_dhcp()
                else:
                    # Network not used by any other instance therefore delete
                    with util_general.RecordedOperation(
                            'remove network from node', n):
                        n.delete_on_hypervisor()
Ejemplo n.º 10
0
    def test_state_filter_active(self, mock_get_all, mock_attr):
        uuids = []
        for i in instance.Instances([instance.active_states_filter]):
            uuids.append(i.uuid)

        self.assertEqual(['a7c5ecec-c3a9-4774-ad1b-249d9e90e806'], uuids)
Ejemplo n.º 11
0
    def run(self):
        LOG.info('Starting')
        observers = {}

        while not self.exit.is_set():
            # Cleanup terminated observers
            all_observers = list(observers.keys())
            for instance_uuid in all_observers:
                if not observers[instance_uuid].is_alive():
                    # Reap process
                    observers[instance_uuid].join(1)
                    LOG.with_instance(instance_uuid).info(
                        'Trigger observer has terminated')
                    db.add_event('instance', instance_uuid, 'trigger monitor',
                                 'crashed', None, None)
                    del observers[instance_uuid]

            # Audit desired observers
            extra_instances = list(observers.keys())
            missing_instances = []

            with etcd.ThreadLocalReadOnlyCache():
                for inst in instance.Instances([
                        instance.this_node_filter,
                        partial(baseobject.state_filter,
                                [instance.Instance.STATE_CREATED])
                ]):
                    if inst.uuid in extra_instances:
                        extra_instances.remove(inst.uuid)

                    if inst.uuid not in observers:
                        missing_instances.append(inst.uuid)

            # Start missing observers
            for instance_uuid in missing_instances:
                console_path = os.path.join(config.STORAGE_PATH, 'instances',
                                            instance_uuid, 'console.log')
                p = multiprocessing.Process(
                    target=observe,
                    args=(console_path, instance_uuid),
                    name='%s-%s' %
                    (daemon.process_name('triggers'), instance_uuid))
                p.start()

                observers[instance_uuid] = p
                LOG.with_instance(instance_uuid).info(
                    'Started trigger observer')
                db.add_event('instance', instance_uuid, 'trigger monitor',
                             'started', None, None)

            # Cleanup extra observers
            for instance_uuid in extra_instances:
                p = observers[instance_uuid]
                try:
                    os.kill(p.pid, signal.SIGKILL)
                    observers[instance_uuid].join(1)
                except Exception:
                    pass

                del observers[instance_uuid]
                LOG.with_instance(instance_uuid).info(
                    'Finished trigger observer')
                db.add_event('instance', instance_uuid, 'trigger monitor',
                             'finished', None, None)

            self.exit.wait(1)

        # No longer running, clean up all trigger deaemons
        for instance_uuid in observers:
            os.kill(observers[instance_uuid].pid, signal.SIGKILL)
Ejemplo n.º 12
0
    def _maintain_networks(self):
        LOG.info('Maintaining networks')

        # Discover what networks are present
        _, _, vxid_to_mac = util_network.discover_interfaces()

        # Determine what networks we should be on
        host_networks = []
        seen_vxids = []

        if not config.NODE_IS_NETWORK_NODE:
            # For normal nodes, just the ones we have instances for. We need
            # to use the more expensive interfaces_for_instance() method of
            # looking up instance interfaces here if the instance cachce hasn't
            # been populated yet (i.e. the instance is still being created)
            for inst in instance.Instances([instance.this_node_filter,
                                            instance.active_states_filter]):
                ifaces = inst.interfaces
                if not ifaces:
                    ifaces = list(
                        networkinterface.interfaces_for_instance(inst))

                for iface_uuid in ifaces:
                    ni = networkinterface.NetworkInterface.from_db(iface_uuid)
                    if not ni:
                        LOG.with_instance(
                            inst).with_networkinterface(
                            iface_uuid).error('Network interface does not exist')
                    elif ni.network_uuid not in host_networks:
                        host_networks.append(ni.network_uuid)
        else:
            # For network nodes, its all networks
            for n in net.Networks([baseobject.active_states_filter]):
                host_networks.append(n.uuid)

        # Ensure we are on every network we have a host for
        for network in host_networks:
            try:
                n = net.Network.from_db(network)
                if not n:
                    continue

                # If this network is in state delete_wait, then we should remove
                # it if it has no interfaces left.
                if n.state.value == dbo.STATE_DELETE_WAIT:
                    if not networkinterface.interfaces_for_network(n):
                        LOG.with_network(n).info(
                            'Removing stray delete_wait network')
                        etcd.enqueue('networknode', DestroyNetworkTask(n.uuid))

                    # We skip maintenance on all delete_wait networks
                    continue

                # Track what vxlan ids we've seen
                seen_vxids.append(n.vxid)

                if time.time() - n.state.update_time < 60:
                    # Network state changed in the last minute, punt for now
                    continue

                if not n.is_okay():
                    if config.NODE_IS_NETWORK_NODE:
                        LOG.with_network(n).info(
                            'Recreating not okay network on network node')
                        n.create_on_network_node()

                        # If the network node was missing a network, then that implies
                        # that we also need to re-create all of the floating IPs for
                        # that network.
                        for ni in networkinterface.interfaces_for_network(n):
                            if ni.floating.get('floating_address'):
                                LOG.with_fields(
                                    {
                                        'instance': ni.instance_uuid,
                                        'networkinterface': ni.uuid,
                                        'floating': ni.floating.get('floating_address')
                                    }).info('Refloating interface')
                                n.add_floating_ip(ni.floating.get(
                                    'floating_address'), ni.ipv4)
                    else:
                        LOG.with_network(n).info(
                            'Recreating not okay network on hypervisor')
                        n.create_on_hypervisor()

                n.ensure_mesh()

            except exceptions.LockException as e:
                LOG.warning(
                    'Failed to acquire lock while maintaining networks: %s' % e)
            except exceptions.DeadNetwork as e:
                LOG.with_field('exception', e).info(
                    'maintain_network attempted on dead network')
            except processutils.ProcessExecutionError as e:
                LOG.error('Network maintenance failure: %s', e)

        # Determine if there are any extra vxids
        extra_vxids = set(vxid_to_mac.keys()) - set(seen_vxids)

        # We keep a global cache of extra vxlans we've seen before, so that
        # we only warn about them when they've been stray for five minutes.
        global EXTRA_VLANS_HISTORY
        for vxid in EXTRA_VLANS_HISTORY.copy():
            if vxid not in extra_vxids:
                del EXTRA_VLANS_HISTORY[vxid]
        for vxid in extra_vxids:
            if vxid not in EXTRA_VLANS_HISTORY:
                EXTRA_VLANS_HISTORY[vxid] = time.time()

        # Warn of extra vxlans which have been present for more than five minutes
        for vxid in EXTRA_VLANS_HISTORY:
            if time.time() - EXTRA_VLANS_HISTORY[vxid] > 5 * 60:
                LOG.with_field('vxid', vxid).warning(
                    'Extra vxlan present!')
Ejemplo n.º 13
0
def restore_instances():
    # Ensure all instances for this node are defined and have up to date data.
    networks = []
    instances = []
    for inst in instance.Instances([instance.this_node_filter,
                                    instance.healthy_states_filter]):
        instance_problems = []
        inst_interfaces = inst.interfaces
        if not inst_interfaces:
            inst_interfaces = []
        updated_interfaces = False

        for ni in interfaces_for_instance(inst):
            if ni.network_uuid not in networks:
                networks.append(ni.network_uuid)
            if ni.uuid not in inst_interfaces:
                inst_interfaces.append(ni.uuid)
                updated_interfaces = True

        # We do not need a lock here because this loop only runs on the node
        # with the instance, and interfaces don't change post instance
        # creation.
        if updated_interfaces:
            inst.interfaces = inst_interfaces

        # TODO(mikal): do better here.
        # for disk in inst.disk_spec:
        #     if disk.get('base'):
        #         img = images.Image.new(disk['base'])
        #         # NOTE(mikal): this check isn't great -- it checks for the original
        #         # downloaded image, not the post transcode version
        #         if (img.state in [dbo.STATE_DELETED, dbo.STATE_ERROR] or
        #                 not os.path.exists(img.version_image_path())):
        #             instance_problems.append(
        #                 '%s missing from image cache' % disk['base'])
        #             img.delete()

        if instance_problems:
            inst.enqueue_delete_due_error(
                'instance bad on startup: %s' % '; '.join(instance_problems))
        else:
            instances.append(inst)

    with util_general.RecordedOperation('restore networks', None):
        for network in networks:
            try:
                n = net.Network.from_db(network)
                if not n.is_dead():
                    LOG.with_object(n).info('Restoring network')
                    n.create_on_hypervisor()
                    n.ensure_mesh()
            except Exception as e:
                util_general.ignore_exception(
                    'restore network %s' % network, e)

    with util_general.RecordedOperation('restore instances', None):
        for inst in instances:
            try:
                with inst.get_lock(ttl=120, timeout=120, op='Instance restore'):
                    started = ['on', 'transition-to-on',
                               instance.Instance.STATE_INITIAL, 'unknown']
                    if inst.power_state not in started:
                        continue

                    LOG.with_object(inst).info('Restoring instance')
                    inst.create_on_hypervisor()
            except Exception as e:
                util_general.ignore_exception(
                    'restore instance %s' % inst, e)
                inst.etcd.enqueue_delete_due_error(
                    'exception while restoring instance on daemon restart')