예제 #1
0
 def learn_failed_lustre_tunefs_secondary_lvm_mounts(self):
     for host, host_data in self.all_hosts_data.items():
         devices = get_devices(host.fqdn, timeout=30)
         for vgname in devices["lvs"]:
             for lvname in devices["lvs"][vgname]:
                 lv = devices["lvs"][vgname][lvname]
                 targets = ManagedTarget.objects.filter(uuid=lv["uuid"])
                 if not targets.count():
                     log.warning(
                         "Ignoring lv {}, no matching ManagedTarget".format(
                             lv["uuid"]))
                     continue
                 for target in targets:
                     try:
                         log.info("Target %s seen on %s" % (target, host))
                         volumenode = self._get_volume_node(
                             host, self.ha_targets[lv["uuid"]]["paths"])
                         (tm, created
                          ) = ManagedTargetMount.objects.get_or_create(
                              target=target,
                              host=host,
                              volume_node=volumenode)
                         if created:
                             tm.save()
                             log.info(
                                 "Learned association %d between %s and host %s"
                                 % (tm.id, lv["name"], host))
                             self._learn_event(host, tm)
                             ObjectCache.add(ManagedTargetMount, tm)
                     except e:
                         log.error("Could create target %s on %s: %s" %
                                   (target, host, e))
    def agent_session_start(self, host_id, data, initial_scan=True):
        with transaction.atomic():
            initiate_device_poll = False
            reported_device_node_paths = []

            fqdn = ManagedHost.objects.get(id=host_id).fqdn
            devices = get_devices(fqdn)

            # use info from IML 4.0
            if not devices and data:
                devices = data

            for expected_item in [
                    "vgs", "lvs", "zfspools", "zfsdatasets", "devs",
                    "local_fs", "mds", "mpath"
            ]:
                if expected_item not in devices.keys():
                    devices[expected_item] = {}

            dev_json = json.dumps(devices["devs"], sort_keys=True)

            if dev_json == self.current_devices:
                return None

            log.debug("Linux.devices changed on {}: {}".format(
                fqdn,
                set(json.loads(self.current_devices).keys()) -
                set(devices["devs"].keys())))

            self.current_devices = dev_json

            lv_block_devices = set()
            for vg, lv_list in devices["lvs"].items():
                for lv_name, lv in lv_list.items():
                    try:
                        lv_block_devices.add(lv["block_device"])
                    except KeyError:
                        # An inactive LV has no block device
                        pass

            mpath_block_devices = set()
            for mp_name, mp in devices["mpath"].items():
                mpath_block_devices.add(mp["block_device"])

            special_block_devices = lv_block_devices | mpath_block_devices

            for uuid, md_info in devices["mds"].items():
                special_block_devices.add(md_info["block_device"])

            def add_zfs(zfs_info):
                # add attributes not specific to zfs instances
                bdid = zfs_info["block_device"]
                special_block_devices.add(bdid)
                dev = devices["devs"][bdid]
                dev["major_minor"] = bdid
                dev["parent"] = None
                dev["serial_80"] = None
                dev["serial_83"] = None
                dev["filesystem_type"] = "zfs" if bdid.startswith(
                    "zfsset") else None

            for uuid, zfs_info in merge(devices["zfspools"],
                                        devices["zfsdatasets"]).items():
                add_zfs(zfs_info)

            def preferred_serial(bdev):
                for attr in SERIAL_PREFERENCE:
                    if bdev[attr]:
                        return bdev[attr]
                return None

            # Scrub dodgy QEMU SCSI IDs
            for bdev in devices["devs"].values():
                qemu_pattern = "QEMU HARDDISK"
                if bdev["serial_80"] and bdev["serial_80"].find(
                        qemu_pattern) != -1:
                    # Virtual environments can set an ID that trails QEMU HARDDISK, in which case
                    # we should pick that up, or this might not be a real ID at all.
                    # We have seen at least "SQEMU    QEMU HARDDISK" and "SQEMU    QEMU HARDDISK  0"
                    # for devices without manually set IDs, so apply a general condition that the trailing
                    # portion must be more than N characters for us to treat it like an ID
                    trailing_id = bdev["serial_80"].split(
                        qemu_pattern)[1].strip()
                    if len(trailing_id) < 4:
                        bdev["serial_80"] = None
                    else:
                        bdev["serial_80"] = trailing_id
                if bdev["serial_83"] and bdev["serial_83"].find(
                        qemu_pattern) != -1:
                    bdev["serial_83"] = None

            # Create ScsiDevices
            res_by_serial = {}
            scsi_device_identifiers = []

            for bdev in devices["devs"].values():
                serial = preferred_serial(bdev)
                if not bdev["major_minor"] in special_block_devices:
                    if serial is not None and serial not in res_by_serial:
                        # NB it's okay to have multiple block devices with the same
                        # serial (multipath): we just store the serial+size once
                        node, created = self.update_or_create(
                            ScsiDevice,
                            serial=serial,
                            size=bdev["size"],
                            filesystem_type=bdev["filesystem_type"])
                        res_by_serial[serial] = node
                        scsi_device_identifiers.append(node.id_tuple())

            # Map major:minor string to LinuxDeviceNode
            self.major_minor_to_node_resource = {}

            # Create DeviceNodes for ScsiDevices and UnsharedDevices
            for bdev in devices["devs"].values():
                # Partitions: we will do these in a second pass once their
                # parents are in bdev_to_resource
                if bdev["parent"] is not None:
                    continue

                # Don't create ScsiDevices for devicemapper, mdraid
                if bdev["major_minor"] in special_block_devices:
                    continue

                serial = preferred_serial(bdev)
                if serial is not None:
                    # Serial is set, so look up the ScsiDevice
                    lun_resource = res_by_serial[serial]
                    node, created = self.update_or_create(
                        LinuxDeviceNode,
                        parents=[lun_resource],
                        logical_drive=lun_resource,
                        host_id=host_id,
                        path=bdev["path"],
                    )
                    self.major_minor_to_node_resource[
                        bdev["major_minor"]] = node
                    reported_device_node_paths.append(bdev["path"])
                else:
                    # Serial is not set, so create an UnsharedDevice
                    device, created = self.update_or_create(
                        UnsharedDevice,
                        path=bdev["path"],
                        size=bdev["size"],
                        filesystem_type=bdev["filesystem_type"])
                    node, created = self.update_or_create(LinuxDeviceNode,
                                                          parents=[device],
                                                          logical_drive=device,
                                                          host_id=host_id,
                                                          path=bdev["path"])
                    self.major_minor_to_node_resource[
                        bdev["major_minor"]] = node
                    reported_device_node_paths.append(bdev["path"])

            # Okay, now we've got ScsiDeviceNodes, time to build the devicemapper ones
            # on top of them.  These can come in any order and be nested to any depth.
            # So we have to build a graph and then traverse it to populate our resources.
            for bdev in devices["devs"].values():
                if bdev["major_minor"] in lv_block_devices:
                    node, created = self.update_or_create(LinuxDeviceNode,
                                                          host_id=host_id,
                                                          path=bdev["path"])
                elif bdev["major_minor"] in mpath_block_devices:
                    node, created = self.update_or_create(LinuxDeviceNode,
                                                          host_id=host_id,
                                                          path=bdev["path"])
                elif bdev["parent"]:
                    node, created = self.update_or_create(LinuxDeviceNode,
                                                          host_id=host_id,
                                                          path=bdev["path"])
                else:
                    continue

                self.major_minor_to_node_resource[bdev["major_minor"]] = node
                reported_device_node_paths.append(bdev["path"])

            # Finally remove any of the scsi devs that are no longer present.
            initiate_device_poll |= self.remove_missing_devices(
                host_id, ScsiDevice, scsi_device_identifiers)

            # Now all the LUNs and device nodes are in, create the links between
            # the DM block devices and their parent entities.
            vg_uuid_to_resource = {}
            for vg in devices["vgs"].values():
                # Create VG resource
                vg_resource, created = self.update_or_create(LvmGroup,
                                                             uuid=vg["uuid"],
                                                             name=vg["name"],
                                                             size=vg["size"])
                vg_uuid_to_resource[vg["uuid"]] = vg_resource

                # Add PV block devices as parents of VG
                for pv_bdev in vg["pvs_major_minor"]:
                    if pv_bdev in self.major_minor_to_node_resource:
                        vg_resource.add_parent(
                            self.major_minor_to_node_resource[pv_bdev])

            for vg, lv_list in devices["lvs"].items():
                for lv_name, lv in lv_list.items():
                    vg_info = devices["vgs"][vg]
                    vg_resource = vg_uuid_to_resource[vg_info["uuid"]]

                    # Make the LV a parent of its device node on this host
                    lv_resource, created = self.update_or_create(
                        LvmVolume,
                        parents=[vg_resource],
                        uuid=lv["uuid"],
                        name=lv["name"],
                        vg=vg_resource,
                        size=lv["size"],
                        filesystem_type=devices["devs"][lv["block_device"]]
                        ["filesystem_type"],
                    )

                    try:
                        lv_node = self.major_minor_to_node_resource[
                            lv["block_device"]]
                        lv_node.logical_drive = lv_resource
                        lv_node.add_parent(lv_resource)
                    except KeyError:
                        # Inactive LVs have no block device
                        pass

            for mpath_alias, mpath in devices["mpath"].items():
                # Devices contributing to the multipath
                mpath_parents = [
                    self.major_minor_to_node_resource[n["major_minor"]]
                    for n in mpath["nodes"]
                ]
                # The multipath device node
                mpath_node = self.major_minor_to_node_resource[
                    mpath["block_device"]]
                for p in mpath_parents:
                    # All the mpath_parents should have the same logical_drive
                    mpath_node.logical_drive = mpath_parents[0].logical_drive
                    mpath_node.add_parent(p)

            self._map_drives_to_device_to_node(devices, host_id, "mds", MdRaid,
                                               [], reported_device_node_paths)

            initiate_device_poll = (self._map_drives_to_device_to_node(
                devices, host_id, "zfspools", ZfsPool, ["name"],
                reported_device_node_paths) or initiate_device_poll)

            initiate_device_poll = (self._map_drives_to_device_to_node(
                devices, host_id, "zfsdatasets", ZfsDataset, ["name"],
                reported_device_node_paths) or initiate_device_poll)

            for bdev, (mntpnt, fstype) in devices["local_fs"].items():
                if fstype != "lustre":
                    bdev_resource = self.major_minor_to_node_resource[bdev]
                    self.update_or_create(LocalMount,
                                          parents=[bdev_resource],
                                          mount_point=mntpnt,
                                          fstype=fstype)

            # Create Partitions (devices that have 'parent' set)
            partition_identifiers = []

            for bdev in [x for x in devices["devs"].values() if x["parent"]]:
                this_node = self.major_minor_to_node_resource[
                    bdev["major_minor"]]
                parent_resource = self.major_minor_to_node_resource[
                    bdev["parent"]]

                if not parent_resource.logical_drive:
                    raise RuntimeError("Parent %s of %s has no logical drive" %
                                       (parent_resource, bdev))

                partition, created = self.update_or_create(
                    # ZfsPartitions should be differentiated as they are not usable for lustre
                    ZfsPartition if bdev.get("is_zfs_reserved")
                    or bdev["filesystem_type"] == "zfs_member" else Partition,
                    parents=[parent_resource],
                    container=parent_resource.logical_drive,
                    number=bdev["partition_number"],
                    size=bdev["size"],
                    filesystem_type=bdev["filesystem_type"],
                )

                this_node.add_parent(partition)
                partition_identifiers.append(partition.id_tuple())

            # Finally remove any of the partitions that are no longer present.
            initiate_device_poll |= self.remove_missing_devices(
                host_id, Partition, partition_identifiers)

            initiate_device_poll |= self.remove_missing_devices(
                host_id, ZfsPartition, partition_identifiers)

            initiate_device_poll |= self.remove_missing_devicenodes(
                reported_device_node_paths)

        # If we see a device change and the data was sent by the agent poll rather than initial start up
        # then we need to cause all of the ha peer agents and any other nodes that we share VolumeNodes with
        # re-poll themselves.
        # This 'set' is probably a good balance between every node and no poll at all.
        if (initial_scan is False) and (initiate_device_poll is True):
            ha_peers = set(
                HaCluster.host_peers(ManagedHost.objects.get(id=host_id)))

            hosts_volume_node_ids = [
                volume_node.volume_id
                for volume_node in VolumeNode.objects.filter(host_id=host_id)
            ]
            all_volume_nodes = list(
                VolumeNode.objects.filter(volume_id__in=hosts_volume_node_ids))
            all_volume_node_hosts = ManagedHost.objects.filter(id__in=set(
                volume_node.host_id for volume_node in all_volume_nodes))

            ha_peers |= set(all_volume_node_hosts)
            JobSchedulerClient.trigger_plugin_update(
                [peer.id for peer in ha_peers], [host_id], ["linux"])