Esempio n. 1
0
    def register(self, address=None):
        # FIXME: At this time the 'capabilities' attribute is unused on the manager
        data = {
            "address": address,
            "fqdn": self._fqdn,
            "nodename": self._nodename,
            "capabilities": self.action_plugins.capabilities,
            "version": version(),
            "csr": self._crypto.generate_csr(self._fqdn),
        }

        if self._fqdn == "localhost.localdomain":
            console_log.error(
                "Registration failed, FQDN is localhost.localdomain")
            raise RuntimeError(
                "Name resolution error, FQDN resolves to localhost.localdomain"
            )

        # TODO: during registration, we should already have the authority certificate
        # so we should establish an HTTPS connection (no client cert) with the
        # manager, and verify that the manager's certificate is signed and for
        # an address matching self.url

        try:
            result = self.post(data)
        except HttpError:
            console_log.error("Registration failed to %s with request %s" %
                              (self.url, data))
            raise
        else:
            return result
Esempio n. 2
0
def reregister_server(url, address):
    """ Update manager url and register agent address with manager """
    if _service_is_running() is True:
        console_log.warning(
            "chroma-agent service was running before registration, stopping.")
        agent_service.stop()

    config.set('settings', 'server', {'url': url})
    crypto = Crypto(config.path)
    agent_client = AgentClient(url + 'reregister/', ActionPluginManager(),
                               DevicePluginManager(), ServerProperties(),
                               crypto)
    data = {'address': address, 'fqdn': agent_client._fqdn}

    try:
        result = agent_client.post(data)
    except HttpError:
        console_log.error("Reregistration failed to %s with request %s" %
                          (agent_client.url, data))
        raise

    console_log.info("Starting chroma-agent service")
    agent_service.start()

    return result
Esempio n. 3
0
def reregister_server(url, address):
    """ Update manager url and register agent address with manager """
    if _service_is_running() is True:
        console_log.warning(
            "chroma-agent service was running before registration, stopping.")
        agent_service.stop()

    conf.set_server_url(url)
    crypto = Crypto(conf.ENV_PATH)
    agent_client = AgentClient(
        url + "reregister/",
        ActionPluginManager(),
        DevicePluginManager(),
        ServerProperties(),
        crypto,
    )
    data = {"address": address, "fqdn": agent_client._fqdn}

    try:
        result = agent_client.post(data)
    except HttpError:
        console_log.error("Reregistration failed to %s with request %s" %
                          (agent_client.url, data))
        raise

    console_log.info("Starting chroma-agent service")
    agent_service.start()

    return result
Esempio n. 4
0
def import_target(device_type,
                  path,
                  pacemaker_ha_operation,
                  validate_importable=False):
    """
    Passed a device type and a path import the device if such an operation make sense. For example a jbod scsi
    disk does not have the concept of import whilst zfs does.
    :param device_type: the type of device to import
    :param path: path of device to import
    :param pacemaker_ha_operation: This import is at the request of pacemaker. In HA operations the device may
               often have not have been cleanly exported because the previous mounted node failed in operation.
    :param validate_importable: The intention is to make sure the device can be imported but not actually import it.
               in this in incarnation the device is import and the exported checking for errors.
    :return: None or an Error message
    """
    blockdevice = BlockDevice(device_type, path)

    error = blockdevice.import_(False)
    if error:
        if '-f' in error and pacemaker_ha_operation:
            error = blockdevice.import_(True)

    if error:
        console_log.error("Error importing pool: '%s'" % error)

    if (error is None) and (validate_importable is True):
        error = blockdevice.export()

        if error:
            console_log.error("Error exporting pool: '%s'" % error)

    return agent_ok_or_error(error)
Esempio n. 5
0
def _configure_target_ha(ha_label, info, enabled=False):
    if enabled:
        extra = []
    else:
        extra = ['--disabled']

    bdev = info['bdev']

    if info['device_type'] == 'zfs':
        extra += ['--group', _group_name(ha_label)]
        zpool = info['bdev'].split("/")[0]
        result = AgentShell.run([
            'pcs', 'resource', 'create',
            _zfs_name(ha_label), 'ocf:chroma:ZFS', 'pool={}'.format(zpool),
            'op', 'start', 'timeout=120', 'op', 'stop', 'timeout=90'
        ] + extra)
        if result.rc != 0:
            console_log.error("Resource (%s) create failed:%d: %s", zpool,
                              result.rc, result.stderr)
            return result

        if enabled and not _wait_target(_zfs_name(ha_label), True):
            return {
                "rc":
                -1,
                "stdout":
                "",
                "stderr":
                "ZFS Resource ({}) failed to start".format(_zfs_name(ha_label))
            }

    else:
        # This is a hack for ocf:lustre:Lustre up to Lustre 2.10.5/2.11 see LU-11461
        result = AgentShell.run(['realpath', info['bdev']])
        if result.rc == 0 and result.stdout.startswith('/dev/sd'):
            bdev = result.stdout.strip()

    # Create Lustre resource and add target=uuid as an attribute
    result = AgentShell.run([
        'pcs', 'resource', 'create', ha_label, 'ocf:lustre:Lustre',
        'target={}'.format(bdev), 'mountpoint={}'.format(
            info['mntpt']), 'op', 'start', 'timeout=600'
    ] + extra)

    if result.rc != 0 or enabled and not _wait_target(ha_label, True):
        if result.rc == 0:
            result.rc = -1
            result.stderr = "Resource ({}) failed to start".format(ha_label)

        console_log.error("Failed to create resource %s:%d: %s", ha_label,
                          result.rc, result.stderr)

        if info['device_type'] == 'zfs':
            AgentShell.run(['pcs', 'resource', 'delete', _zfs_name(ha_label)])

    return result
Esempio n. 6
0
def scan_packages():
    """
    Interrogate the packages available from configured repositories, and the installation
    status of those packages.
    """

    # Look up what repos are configured
    # =================================
    if not os.path.exists(REPO_PATH):
        return None

    cp = ConfigParser.SafeConfigParser()
    cp.read(REPO_PATH)
    repo_names = sorted(cp.sections())
    repo_packages = dict([(name,
                           defaultdict(lambda: {
                               'available': [],
                               'installed': []
                           })) for name in repo_names])

    # For all repos, enumerate packages in the repo in alphabetic order
    # =================================================================
    yum_util('clean', fromrepo=repo_names)

    # For all repos, query packages in alphabetical order
    # ===================================================
    for repo_name in repo_names:
        packages = repo_packages[repo_name]
        try:
            stdout = yum_util('repoquery', fromrepo=[repo_name])

            # Returning nothing means the package was not found at all and so we have no data to deliver back.
            if stdout:
                for line in [l.strip() for l in stdout.strip().split("\n")]:
                    if line.startswith("Last metadata expiration check") or \
                       line.startswith("Waiting for process with pid"):
                        continue
                    epoch, name, version, release, arch = line.split()
                    if arch == "src":
                        continue
                    packages[name]['available'].append(
                        VersionInfo(epoch=epoch,
                                    version=version,
                                    release=release,
                                    arch=arch))
        except ValueError, e:
            console_log.error("bug HYD-2948. repoquery Output: %s" % (stdout))
            raise e
        except RuntimeError, e:
            # This is a network operation, so cope with it failing
            daemon_log.error(e)
            return None
Esempio n. 7
0
def export_target(device_type, path):
    """
    Passed a device type and a path export the device if such an operation make sense. For example a jbod scsi
    disk does not have the concept of export whilst zfs does.
    :param path: path of device to export
    :param device_type: the type of device to export
    :return: None or an Error message
    """

    blockdevice = BlockDevice(device_type, path)

    error = blockdevice.export()

    if error:
        console_log.error("Error exporting pool: '%s'" % error)

    return agent_ok_or_error(error)
Esempio n. 8
0
def _configure_target_ha(ha_label, info, enabled=False):
    if enabled:
        extra = []
    else:
        extra = ["--disabled"]

    xmlid = ha_label
    res = _resource_xml(
        ha_label,
        "ocf:lustre:Lustre",
        {
            "target": info["bdev"],
            "mountpoint": info["mntpt"]
        },
    )

    if info["device_type"] == "zfs":
        xmlid = _group_name(ha_label)
        grp = ET.Element("group", {"id": xmlid})
        zpool = info["bdev"].split("/")[0]
        grp.append(
            _resource_xml(_zfs_name(ha_label), "ocf:chroma:ZFS",
                          {"pool": zpool}))
        grp.append(res)
        res = grp

    if not enabled:
        meta = ET.SubElement(res, "meta_attributes",
                             {"id": "{}-{}".format(xmlid, "meta_attributes")})
        _nvpair_xml(meta, "target_role", "Stopped")

    # Create Lustre resource and add target=uuid as an attribute
    result = cibcreate("resources", ET.tostring(res))

    if result.rc != 0 or enabled and not _wait_target(ha_label, True):
        if result.rc == 0:
            result = AgentShell.RunResult(
                -1, "", "Resource ({}) failed to start".format(ha_label),
                False)

        console_log.error("Failed to create resource %s:%d: %s", ha_label,
                          result.rc, result.stderr)

    return result
Esempio n. 9
0
def import_target(device_type, path, pacemaker_ha_operation):
    """
    Passed a device type and a path import the device if such an operation make sense. For example a jbod scsi
    disk does not have the concept of import whilst zfs does.
    :param device_type: the type of device to import
    :param path: path of device to import
    :param pacemaker_ha_operation: This import is at the request of pacemaker. In HA operations the device may
               often have not have been cleanly exported because the previous mounted node failed in operation.
    :return: None or an Error message
    """
    blockdevice = BlockDevice(device_type, path)

    error = blockdevice.import_(False)
    if error:
        if '-f' in error and pacemaker_ha_operation:
            error = blockdevice.import_(True)

    if error:
        console_log.error("Error importing pool: '%s'" % error)

    return agent_ok_or_error(error)
Esempio n. 10
0
def configure_target_ha(primary, device, ha_label, uuid, mount_point):
    """
    Configure the target high availability

    :return: Value using simple return protocol
    """

    _mkdir_p_concurrent(mount_point)

    if primary:
        info = _get_target_config(uuid)
        # If the target already exists with the same params, skip.
        # If it already exists with different params, that is an error
        if _resource_exists(ha_label):
            if info["bdev"] == device and info["mntpt"] == mount_point:
                return agent_result_ok

            return agent_error(
                "A resource with the name {} already exists".format(ha_label))
        if info["bdev"] != device or info["mntpt"] != mount_point:
            console_log.error(
                "Mismatch for %s do not match configured (%s on %s) != (%s on %s)",
                ha_label,
                device,
                mount_point,
                info["bdev"],
                info["mntpt"],
            )
        result = _configure_target_ha(ha_label, info, False)
        if result.rc != 0:
            return agent_error("Failed to create {}: {}".format(
                ha_label, result.rc))

    result = _configure_target_priority(primary, ha_label, _this_node())
    if result.rc != 0:
        return agent_error(
            "Failed to create location constraint on {}: {}".format(
                ha_label, result.rc))

    return agent_result_ok
    def _parse_dm_table(self, stdout):
        if stdout.strip() == "No devices found":
            dm_lines = []
        else:
            dm_lines = [i for i in stdout.split("\n") if len(i) > 0]

        # Compose a lookup of names of multipath devices, for use
        # in parsing other lines
        multipath_names = set()
        for line in dm_lines:
            tokens = line.split()
            name = tokens[0].strip(":")
            dm_type = tokens[3]
            if dm_type == 'multipath':
                multipath_names.add(name)

        def _read_lv(block_device, lv_name, vg_name, devices):
            self.lvs[vg_name][lv_name]['block_device'] = block_device

            devices = [
                self.block_devices.block_device_nodes[i]['major_minor']
                for i in devices
            ]
            self.vgs[vg_name]['pvs_major_minor'] = list(
                set(self.vgs[vg_name]['pvs_major_minor']) | set(devices))

        def _read_lv_partition(block_device, parent_lv_name, vg_name):
            # HYD-744: FIXME: compose path in a way that copes with hyphens
            parent_block_device = self.block_devices.node_block_devices[
                "%s/%s-%s" %
                (BlockDevices.MAPPERPATH, vg_name, parent_lv_name)]
            self.block_devices.block_device_nodes[block_device][
                'parent'] = parent_block_device

        def _read_mpath_partition(block_device, parent_mpath_name):
            # A non-LV partition
            parent_block_device = self.block_devices.node_block_devices[
                "%s/%s" % (BlockDevices.MAPPERPATH, parent_mpath_name)]
            self.block_devices.block_device_nodes[block_device][
                'parent'] = parent_block_device

        # Make a note of which VGs/LVs are in the table so that we can
        # filter out nonlocal LVM components.
        local_lvs = set()
        local_vgs = set()

        for line in dm_lines:
            tokens = line.split()
            name = tokens[0].strip(":")
            dm_type = tokens[3]

            node_path = os.path.join(BlockDevices.MAPPERPATH, name)
            block_device = self.block_devices.node_block_devices[node_path]

            if dm_type in ['linear', 'striped']:
                # This is either an LV or a partition.
                # Try to resolve its name to a known LV, if not found then it
                # is a partition.
                # This is an LVM LV
                if dm_type == 'striped':
                    # List of striped devices
                    dev_indices = range(6, len(tokens), 2)
                    devices = [tokens[i] for i in dev_indices]
                elif dm_type == 'linear':
                    # Single device linear range
                    devices = [tokens[4]]
                else:
                    console_log.error("Failed to parse dmsetupline '%s'" %
                                      line)
                    continue

                # To be an LV:
                #  Got to have a hyphen
                #  Got to appear in lvs dict

                # To be a partition:
                #  Got to have a (.*)p\d+$
                #  Part preceeding that pattern must be an LV or a mpath

                # Potentially confusing scenarios:
                #  A multipath device named foo-bar where there exists a VG called 'foo'
                #  An LV whose name ends "p1" like foo-lvp1
                #  NB some scenarios may be as confusing for devicemapper as they are for us, e.g.
                #  if someone creates an LV "bar" in a VG "foo", and also an mpath called "foo-bar"

                # First, let's see if it's an LV or an LV partition
                match = re.search("(.*[^-])-([^-].*)", name)
                if match:
                    vg_name, lv_name = match.groups()
                    # When a name has a "-" in it, DM prints a double hyphen in the output
                    # So for an LV called "my-lv" you get VolGroup00-my--lv
                    vg_name = vg_name.replace("--", "-")
                    lv_name = lv_name.replace("--", "-")
                    try:
                        vg_lv_info = self.lvs[vg_name]
                        local_vgs.add(vg_name)
                    except KeyError:
                        # Part before the hyphen is not a VG, so this can't be an LV
                        pass
                    else:
                        if lv_name in vg_lv_info:
                            _read_lv(block_device, lv_name, vg_name, devices)
                            local_lvs.add(lv_name)
                            continue
                        else:
                            # It's not an LV, but it matched a VG, could it be an LV partition?
                            result = re.search("(.*)p\d+", lv_name)
                            if result:
                                lv_name = result.groups()[0]
                                if lv_name in vg_lv_info:
                                    # This could be an LV partition.
                                    _read_lv_partition(block_device, lv_name,
                                                       vg_name)
                                    local_lvs.add(lv_name)
                                    continue
                else:
                    # If it isn't an LV or an LV partition, see if it looks like an mpath partition
                    result = re.search("(.*)p\d+", name)
                    if result:
                        mpath_name = result.groups()[0]
                        if mpath_name in multipath_names:
                            _read_mpath_partition(block_device, mpath_name)
                        else:
                            # Part before p\d+ is not an mpath, therefore not a multipath partition
                            pass
                    else:
                        # No trailing p\d+, therefore not a partition
                        console_log.error(
                            "Cannot handle devicemapper device %s: it doesn't look like an LV or a partition"
                            % name)
            elif dm_type == 'multipath':
                if name in self.mpaths:
                    raise RuntimeError("Duplicated mpath device %s" % name)

                major_minors = self._parse_multipath_params(tokens[4:])

                # multipath devices might reference devices that don't exist (maybe did and the removed) so
                # becareful about missing keys.
                devices = [
                    self.block_devices.block_device_nodes[major_minor]
                    for major_minor in major_minors
                    if major_minor in self.block_devices.block_device_nodes
                ]

                # Add this devices to the canonical path list.
                for device in devices:
                    ndp.add_normalized_device(
                        device['path'],
                        "%s/%s" % (BlockDevices.MAPPERPATH, name))

                self.mpaths[name] = {
                    "name": name,
                    "block_device": block_device,
                    "nodes": devices
                }
            else:
                continue

        # Filter out nonlocal LVM components (HYD-2431)
        self.vgs = dict([(vg, value) for vg, value in self.vgs.items()
                         if vg in local_vgs])
        self.lvs = dict([(lv, value) for lv, value in self.lvs.items()
                         if lv in local_vgs])
        for vg_name, vg_lvs in self.lvs.items():
            self.lvs[vg_name] = dict([(k, v)
                                      for k, v in self.lvs[vg_name].items()
                                      if k in local_lvs])
Esempio n. 12
0
    for res in dom.getElementsByTagName('primitive'):
        if not (res.getAttribute("provider") == "chroma"
                and res.getAttribute("type") == "Target"):
            continue

        ha_label = res.getAttribute("id")

        # _get_target_config() will raise KeyError if uuid doesn't exist locally
        # next() will raise StopIteration if it doesn't find attribute target
        try:
            info = next(
                _get_target_config(ops.getAttribute("value"))
                for ops in res.getElementsByTagName('nvpair')
                if ops.getAttribute("name") == "target")
        except Exception as err:
            console_log.error("No local info for resource: %s", ha_label)
            continue

        _unconfigure_target_priority(False, ha_label)
        _unconfigure_target_priority(True, ha_label)
        _unconfigure_target_ha(ha_label, info, True)
        _configure_target_ha(ha_label, info,
                             (active.get(ha_label) is not None))
        _configure_target_priority(True, ha_label, locations[ha_label][0])
        _configure_target_priority(False, ha_label, locations[ha_label][1])
        wait_list.append([ha_label, (active.get(ha_label) is not None)])

    # wait for last item
    for wait in wait_list:
        console_log.info("Waiting on %s", wait[0])
        _wait_target(*wait)
Esempio n. 13
0
        # (target-role:Stopped) is new.
        if "target-role" in columns[2]:
            del columns[2]

        # and even newer pacemakers add a "(disabled)" to the end of the line:
        # MGS_e1321a	(ocf::chroma:Target):	Stopped (disabled)
        if columns[3] == "(disabled)":
            columns[3] = None

        # Similar to above, the third column can report one of various
        # states such as Starting, Started, Stopping, Stopped so only
        # consider targets which are Started
        # If we still have 4 columns at this point, the third column
        # must be the state
        if columns[2] not in ['Starting', 'Started', 'Stopping', 'Stopped']:
            console_log.error("Unable to determine state of %s in\n%s'" %
                              (columns[0], lines_text))

        # a target that is "Stopping" has not completed the transistion
        # from "Started" (i.e. running) to Stopped, so count it as running
        # until it completes the transition
        if columns[2] == "Started" or columns[2] == "Stopping":
            locations[columns[0]] = columns[3]
        else:
            locations[columns[0]] = None

    return locations


def check_block_device(path, device_type):
    """
    Precursor to formatting a device: check if there is already a filesystem on it.
Esempio n. 14
0
def convert_targets(force=False):
    """
    Convert existing ocf:chroma:Target to ZFS + Lustre
    """
    try:
        result = AgentShell.run(["cibadmin", "--query"])
    except OSError as err:
        if err.errno != errno.ENOENT:
            raise err
        return {
            "crm_mon_error": {
                "rc": err.errno,
                "stdout": err.message,
                "stderr": err.strerror,
            }
        }

    if result.rc != 0:
        # Pacemaker not running, or no resources configured yet
        return {
            "crm_mon_error": {
                "rc": result.rc,
                "stdout": result.stdout,
                "stderr": result.stderr,
            }
        }

    dom = ET.fromstring(result.stdout)

    this_node = _this_node()

    # node elements are numbered from 1
    # dc-uuid is the node id of the domain controller
    dcuuid = next(
        (node.get("uname") for node in dom.findall(".//node")
         if node.get("id") == dom.get("dc-uuid")),
        "",
    )

    if dcuuid != this_node and not force:
        console_log.info("This is not Pacemaker DC %s this is %s", dcuuid,
                         this_node)
        return

    # Build map of resource -> [ primary node, secondary node ]
    locations = {}
    for con in dom.findall(".//rsc_location"):
        ha_label = con.get("rsc")
        if not locations.get(ha_label):
            locations[ha_label] = {}
        if con.get("id") == _constraint(ha_label, True):
            ind = 0
        elif con.get("id") == _constraint(ha_label, False):
            ind = 1
        else:
            console_log.info("Unknown constraint: %s", con.get("id"))
            continue
        locations[ha_label][ind] = con.get("node")

    active = get_resource_locations()

    AgentShell.try_run([
        "crm_attribute",
        "--type",
        "crm_config",
        "--name",
        "maintenance-mode",
        "--update",
        "true",
    ])

    wait_list = []
    for res in dom.findall(".//primitive"):
        if not (res.get("provider") == "chroma"
                and res.get("type") == "Target"):
            continue

        ha_label = res.get("id")

        # _get_target_config() will raise KeyError if uuid doesn't exist locally
        # next() will raise StopIteration if it doesn't find attribute target
        try:
            info = next(
                _get_target_config(ops.get("value"))
                for ops in res.findall('.//nvpair[@name="target"]'))
        except Exception as err:
            console_log.error("No local info for resource: %s", ha_label)
            continue

        _unconfigure_target_priority(False, ha_label)
        _unconfigure_target_priority(True, ha_label)
        _unconfigure_target_ha(ha_label, True)
        _configure_target_ha(ha_label, info,
                             (active.get(ha_label) is not None))
        _configure_target_priority(True, ha_label, locations[ha_label][0])
        _configure_target_priority(False, ha_label, locations[ha_label][1])
        wait_list.append([ha_label, (active.get(ha_label) is not None)])

    # wait for last item
    for wait in wait_list:
        console_log.info("Waiting on %s", wait[0])
        _wait_target(*wait)
    AgentShell.try_run([
        "crm_attribute",
        "--type",
        "crm_config",
        "--name",
        "maintenance-mode",
        "--delete",
    ])