예제 #1
0
    def __init__(self, name, value, node_types=None, node_list=None):
        self.name = name
        self.value = value
        self._node_types = set()

        self.node_types = node_types or set()
        self.node_list = NodeSet()
        if node_list is not None:
            self.node_list = NodeSet.fromlist(node_list)
예제 #2
0
    def testConfigurationParser(self):
        """test configuration parsing"""
        tmpfile = tempfile.NamedTemporaryFile()
        tmpfile.write('# this is a comment\n')
        tmpfile.write('[Main]\n')
        tmpfile.write('admin: nodes[0-1]\n')
        tmpfile.write('nodes[0-1]: nodes[2-5]\n')
        tmpfile.write('nodes[4-5]: nodes[6-9]\n')
        tmpfile.flush()
        parser = TopologyParser()
        parser.load(tmpfile.name)

        parser.tree('admin')
        ns_all = NodeSet('admin,nodes[0-9]')
        ns_tree = NodeSet()
        for nodegroup in parser.tree('admin'):
            ns_tree.add(nodegroup.nodeset)
        self.assertEqual(str(ns_all), str(ns_tree))
예제 #3
0
 def test_nb_errors_remote2(self):
     """Test the method nb_errors() with no error (remote)."""
     action = Action(name='test', target=HOSTNAME, command='/bin/true')
     service = Service('test_service')
     service.add_action(action)
     service.run('test')
     self.assertEqual(action.nodes_error(), NodeSet())
     self.assertEqual(action.nb_errors(), 0)
     self.assertEqual(action.status, DONE)
예제 #4
0
 def test_internal_mismatch(self):
     nodeset = NodeSet("cluster[1-30]c[1-2]")
     self.assertTrue("cluster%sc%s" in nodeset._patterns)
     nodeset._patterns["cluster%sc%s"] = RangeSetND([[1]])
     self.assertRaises(NodeSetParseError, str, nodeset)
     nodeset._patterns["cluster%sc%s"] = RangeSetND([[1, 1]])
     self.assertEqual(str(nodeset), "cluster1c1")
     nodeset._patterns["cluster%sc%s"] = RangeSetND([[1, 1, 1]])
     self.assertRaises(NodeSetParseError, str, nodeset)
예제 #5
0
 def get_devices(devices):
     """Gets the device name"""
     device_list = ''
     if devices:
         try:
             device_list = list(NodeSet(devices, resolver=RESOLVER_NOGROUP))
         except (NodeSetParseError, RangeSetParseError) as e:
             raise RuntimeError("Not a valid device name")
     return ','.join(device_list)
예제 #6
0
 def byhosts(self):
     """Return string describing on which hosts are virtual nodes"""
     byhost = {}
     for node in self.nodes.values():
         if node.host not in byhost:
             byhost[node.host] = NodeSet(node.name)
         else:
             byhost[node.host].add(node.name)
     return " - ".join([k + ':' + str(byhost[k]) for k in byhost])
예제 #7
0
    def __init__(self, root, topology, fanout=0):
        self.root = root
        self.topology = topology
        self.fanout = fanout
        self.nodes_fanin = {}
        self.table = None

        self.table_generate(root, topology)
        self._unreachable_hosts = NodeSet()
예제 #8
0
    def _handle_shine_proxy_error(self, nodes, message):
        """
        Store error messages, for later processing.

        Hostnames are replaced by 'THIS_SHINE_HOST' to allow message grouping.
        Grouping outputs which only differ by the host name.
        """
        message = message.replace(str(nodes), 'THIS_SHINE_HOST')
        self.proxy_errors.add(NodeSet(nodes), message)
예제 #9
0
 def recv_ctl(self, msg):
     """handle incoming messages for state 'control'"""
     self.logger.debug("recv_ctl")
     if msg.type == 'ACK': # and msg.ack == self._history['ctl_id']:
         self.logger.debug("got ack (%s)", msg.type)
         self.send_dequeue()
     elif isinstance(msg, RoutedMessageBase):
         metaworker = self.workers[msg.srcid]
         if msg.type == StdOutMessage.ident:
             if metaworker.eh:
                 nodeset = NodeSet(msg.nodes)
                 decoded = msg.data_decode()
                 self.logger.debug("StdOutMessage: \"%s\"", decoded)
                 for line in decoded.splitlines():
                     for node in nodeset:
                         metaworker._on_remote_node_msgline(node,
                                                            line,
                                                            'stdout',
                                                            self.gateway)
         elif msg.type == StdErrMessage.ident:
             if metaworker.eh:
                 nodeset = NodeSet(msg.nodes)
                 decoded = msg.data_decode()
                 self.logger.debug("StdErrMessage: \"%s\"", decoded)
                 for line in decoded.splitlines():
                     for node in nodeset:
                         metaworker._on_remote_node_msgline(node,
                                                            line,
                                                            'stderr',
                                                            self.gateway)
         elif msg.type == RetcodeMessage.ident:
             rc = msg.retcode
             for node in NodeSet(msg.nodes):
                 metaworker._on_remote_node_rc(node, rc, self.gateway)
         elif msg.type == TimeoutMessage.ident:
             self.logger.debug("TimeoutMessage for %s", msg.nodes)
             for node in NodeSet(msg.nodes):
                 metaworker._on_remote_node_timeout(node, self.gateway)
     elif msg.type == ErrorMessage.ident:
         # tree runtime error, could generate a new event later
         raise TopologyError(msg.reason)
     else:
         self.logger.debug("recv_ctl: unhandled msg %s",  msg)
     """
예제 #10
0
 def recv_ctl(self, msg):
     """handle incoming messages for state 'control'"""
     if msg.type == 'ACK':
         self.logger.debug("got ack (%s)", msg.type)
         # check if ack matches write history msgid to generate ev_written
         if self._cfg_write_hist and msg.ack == self._cfg_write_hist[-1][0]:
             _, nodes, bytes_count, metaworker = self._cfg_write_hist.pop()
             for node in nodes:
                 # we are losing track of the gateway here, we could override
                 # on_written in TreeWorker if needed (eg. for stats)
                 metaworker._on_written(node, bytes_count, 'stdin')
         self.send_dequeue()
     elif isinstance(msg, RoutedMessageBase):
         metaworker = self.workers[msg.srcid]
         if msg.type == StdOutMessage.ident:
             nodeset = NodeSet(msg.nodes)
             # msg.data_decode()'s name is a bit confusing, but returns
             # pickle-decoded bytes (encoded string) and not string...
             decoded = msg.data_decode() + b'\n'
             for line in decoded.splitlines():
                 for node in nodeset:
                     metaworker._on_remote_node_msgline(
                         node, line, 'stdout', self.gateway)
         elif msg.type == StdErrMessage.ident:
             nodeset = NodeSet(msg.nodes)
             decoded = msg.data_decode() + b'\n'
             for line in decoded.splitlines():
                 for node in nodeset:
                     metaworker._on_remote_node_msgline(
                         node, line, 'stderr', self.gateway)
         elif msg.type == RetcodeMessage.ident:
             rc = msg.retcode
             for node in NodeSet(msg.nodes):
                 metaworker._on_remote_node_close(node, rc, self.gateway)
         elif msg.type == TimeoutMessage.ident:
             self.logger.debug("TimeoutMessage for %s", msg.nodes)
             for node in NodeSet(msg.nodes):
                 metaworker._on_remote_node_timeout(node, self.gateway)
     elif msg.type == ErrorMessage.ident:
         # tree runtime error, could generate a new event later
         raise TopologyError("%s: %s" % (self.gateway, msg.reason))
     else:
         self.logger.debug("recv_ctl: unhandled msg %s", msg)
     """
예제 #11
0
    def fromdict(self, grpdict):
        """Populate group attributes from dict."""
        BaseEntity.fromdict(self, grpdict)

        if 'services' in grpdict:
            dep_mapping = {}

            # Wrap dependencies from YAML and build the service
            for names, props in grpdict['services'].items():
                for subservice in NodeSet(names):

                    # Parsing dependencies
                    wrap = DepWrapper()
                    for prop in ('require', 'require_weak',
                                 'before', 'after', 'check'):
                        if prop in props:
                            if prop in ('before', 'after'):
                                props['require_weak'] = props[prop]
                                prop = 'require_weak'
                            wrap.deps[prop] = props[prop]

                    # Get subservices which might be Service or ServiceGroup
                    service = None
                    if 'services' in props:
                        service = ServiceGroup(subservice)
                        service.fromdict(props)
                    else:
                        service = Service(subservice)
                        service.fromdict(props)

                    # Link the group and its new subservice together
                    self._subservices[subservice] = service
                    service.parent = self

                    wrap.source = service
                    dep_mapping[subservice] = wrap

            # Generate dependency links of the service
            for wrap in dep_mapping.values():
                # Not any dependencies so just attach
                for dtype in wrap.deps:
                    for dep in wrap.deps[dtype]:
                        if dep not in self._subservices:
                            raise UnknownDependencyError(dep)
                        wrap.source.add_dep(self._subservices[dep],
                                                         sgth=dtype.upper())

            # Bind subgraph to the service group
            for service in self.iter_subservices():
                if not service.children:
                    service.add_dep(self._source, parent=False)
                if not service.parents:
                    service.add_dep(self._sink)

        for subser in self.iter_subservices():
            subser.inherits_from(self)
예제 #12
0
 def test_failed_nodes(self):
     """failed nodes are backup"""
     action = Action('start', command='/bin/false', target=HOSTNAME)
     service = Service('test')
     service.add_actions(action)
     action.run()
     self.assertEqual(action.failed_nodes, NodeSet(HOSTNAME))
     self.assertEqual(action.status, ERROR)
     # This is propagated to action service
     self.assertEqual(service.failed_nodes, action.failed_nodes)
예제 #13
0
def nodeset(nodes_list):
    '''Convert a list of nodes to ClusterShell's NodeSet'''

    try:
        nodeset = NodeSet(",".join(nodes_list))
    except Exception as e:
        raise AnsibleError('Error joining nodeset, original exception: %s' %
                           to_native(e))

    return nodeset
예제 #14
0
 def setup_method(self, _):
     """Set up an instance of KnownHostsQuery for each test."""
     # pylint: disable=attribute-defined-outside-init
     self.query = KnownHostsQuery({
         'knownhosts': {
             'files': [
                 get_fixture_path(os.path.join('backends',
                                               'knownhosts.txt')),
                 get_fixture_path(
                     os.path.join('backends', 'knownhosts_man.txt')),
             ]
         }
     })
     self.no_query = KnownHostsQuery({})
     self.no_hosts = NodeSet(resolver=RESOLVER_NOGROUP)
     self.domain_hosts = NodeSet('host[1,4-5,7-8,13-14].domain',
                                 resolver=RESOLVER_NOGROUP)
     self.all_hosts = self.domain_hosts | NodeSet(
         'closenet,cvs.example.net', resolver=RESOLVER_NOGROUP)
예제 #15
0
 def test_inheritance(self):
     '''Test inheritance between on a group'''
     ser = Service('parent')
     ser.target = '127.0.0.1'
     ser.timeout = 15
     group = ServiceGroup('group')
     subser1 = Service('subser1')
     subser1.target = HOSTNAME
     subser2 = Service('subser2')
     subser2.timeout = None
     group.add_inter_dep(target=subser1)
     group.add_inter_dep(target=subser2)
     group.inherits_from(ser)
     self.assertEqual(group.target, NodeSet('127.0.0.1'))
     self.assertEqual(group.timeout, 15)
     self.assertEqual(subser1.target, NodeSet(HOSTNAME))
     self.assertEqual(subser1.timeout, 15)
     self.assertEqual(subser2.target, NodeSet('127.0.0.1'))
     self.assertEqual(subser2.timeout, None)
예제 #16
0
    def testOnNodes(self):
        """test tuning with nodes"""
        m = self.makeTempTuningModel("""
alias panic_on_lbug=/proc/sys/lnet/panic_on_lbug
1 panic_on_lbug MDS;CLT;foo[1-5]""")
        m.parse()

        # We have one tuning for each
        tuning = TuningParameter("/proc/sys/lnet/panic_on_lbug", "1", \
                                 ["mds","clt"], NodeSet("foo[1-5]"))
        # Check node types
        for t in ["mds", "client"]:
            tunings = m.get_params_for_name(None, [t])
            self.assertEqual(len(tunings), 1)
            self.assertTrue(tunings[0] == tuning)
        # Check node name
        tunings = m.get_params_for_name(NodeSet("foo[1-2]"), [])
        self.assertEqual(len(tunings), 1)
        self.assertTrue(tunings[0] == tuning)
예제 #17
0
    def __init__(self, nodes, handler, timeout, **kwargs):
        """
        Initialize Pdsh worker instance.
        """
        DistantWorker.__init__(self, handler)

        self.nodes = NodeSet(nodes)
        self.closed_nodes = NodeSet()

        self.command = kwargs.get('command')
        self.source = kwargs.get('source')
        self.dest = kwargs.get('dest')

        autoclose = kwargs.get('autoclose', False)
        stderr = kwargs.get('stderr', False)

        EngineClient.__init__(self, self, stderr, timeout, autoclose)

        if self.command is not None:
            # PDSH
            self.source = None
            self.dest = None
            self.mode = 'pdsh'
        elif self.source:
            # PDCP
            self.command = None
            self.mode = 'pdcp'
            # Preserve modification times and modes?
            self.preserve = kwargs.get('preserve', False)
            # Reverse copy (rpdcp)?
            self.reverse = kwargs.get('reverse', False)
            if self.reverse:
                self.isdir = os.path.isdir(self.dest)
                if not self.isdir:
                    raise ValueError("reverse copy dest must be a directory")
            else:
                self.isdir = os.path.isdir(self.source)
        else:
            raise ValueError("missing command or source parameter in " \
        "WorkerPdsh constructor")

        self.popen = None
        self._buf = ""
 def get_nnodes_from_string(self, nodes_id):
     """ From a string reprenting a set of nodes returns the number of nodes
       the set contains.
       :param nodes_id: nodes id
       :type nodes_id: str
       :returns: number of nodes
       :rtype: int
       """
     nodeset = NodeSet(nodes_id)
     return len(nodeset)
예제 #19
0
    def testStrConversions(self):
        """test str() casts"""
        t = TopologyNodeGroup(NodeSet('admin0'))
        self.assertEquals(str(t), '<TopologyNodeGroup (admin0)>')

        t = TopologyRoutingTable()
        r0 = TopologyRoute(NodeSet('src[0-9]'), NodeSet('dst[5-8]'))
        r1 = TopologyRoute(NodeSet('src[10-19]'), NodeSet('dst[15-18]'))

        self.assertEquals(str(r0), 'src[0-9] -> dst[5-8]')

        t.add_route(r0)
        t.add_route(r1)
        self.assertEquals(str(t),
                          'src[0-9] -> dst[5-8]\nsrc[10-19] -> dst[15-18]')

        g = TopologyGraph()
        # XXX: Actually if g is not empty other things will be printed out...
        self.assertEquals(str(g), '<TopologyGraph>\n')
예제 #20
0
 def ev_hup(self, worker, node, rc):
     """Received end of command from one node"""
     if self.timer is None:
         self.gwchan.send(RetcodeMessage(node, rc, self.srcwkr))
     else:
         # retcode grooming
         if rc in self.retcodes:
             self.retcodes[rc].add(node)
         else:
             self.retcodes[rc] = NodeSet(node)
예제 #21
0
def update_config_cmdlist(args):
    """Create the command lines to update slurmd.conf file.

    Args:
        args (Namespace): Commandline arguments

    Returns:
        cmd_list: list of cmdlines to update config file

    """
    all_nodes = NodeSet("{},{}".format(str(args.control), str(args.nodes)))
    if not args.sudo:
        sudo = ""
    else:
        sudo = "sudo"
    # Copy the slurm*example.conf files to /etc/slurm/
    if execute_cluster_cmds(all_nodes, COPY_LIST, args.sudo) > 0:
        sys.exit(1)

    cmd_list = [
        "sed -i -e 's/ControlMachine=linux0/ControlMachine={}/g' {}".format(
            args.control, SLURM_CONF),
        "sed -i -e 's/ClusterName=linux/ClusterName=ci_cluster/g' {}".format(
            SLURM_CONF),
        "sed -i -e 's/SlurmUser=slurm/SlurmUser={}/g' {}".format(
            args.user, SLURM_CONF),
        "sed -i -e 's/NodeName/#NodeName/g' {}".format(SLURM_CONF),
    ]

    # This info needs to be gathered from every node that can run a slurm job
    command = r"lscpu | grep -E '(Socket|Core|Thread)\(s\)'"
    task = run_task(all_nodes, command)
    for output, nodes in task.iter_buffers():
        output_str = "\n".join([line.decode("utf-8") for line in output])
        info = {
            data[0]: data[1]
            for data in re.findall(r"(Socket|Core|Thread).*:\s+(\d+)",
                                   str(output_str)) if len(data) > 1
        }

        if "Socket" not in info or "Core" not in info or "Thread" not in info:
            # Did not find value for socket|core|thread so do not
            # include in config file
            pass
        cmd_list.append("echo \"NodeName={0} Sockets={1} CoresPerSocket={2} "
                        "ThreadsPerCore={3}\" |{4} tee -a {5}".format(
                            NodeSet.fromlist(nodes), info["Socket"],
                            info["Core"], info["Thread"], sudo, SLURM_CONF))

    #
    cmd_list.append("echo \"PartitionName= {} Nodes={} Default=YES "
                    "MaxTime=INFINITE State=UP\" |{} tee -a {}".format(
                        args.partition, args.nodes, sudo, SLURM_CONF))

    return execute_cluster_cmds(all_nodes, cmd_list, args.sudo)
예제 #22
0
    def testRemovingChild(self):
        """test child removal operation"""
        t0 = TopologyNodeGroup(NodeSet('node[0-9]'))
        t1 = TopologyNodeGroup(NodeSet('node[10-19]'))

        t0.add_child(t1)
        self.assertEquals(t0.children_ns(), t1.nodeset)
        t0.clear_child(t1)
        self.assertEquals(t0.children_ns(), None)

        t0.clear_child(t1)  # error discarded
        self.assertRaises(ValueError, t0.clear_child, t1, strict=True)

        t2 = TopologyNodeGroup(NodeSet('node[20-29]'))
        t0.add_child(t1)
        t0.add_child(t2)
        self.assertEquals(t0.children_ns(), t1.nodeset | t2.nodeset)
        t0.clear_children()
        self.assertEquals(t0.children_ns(), None)
        self.assertEquals(t0.children_len(), 0)
예제 #23
0
    def testConfigurationParserDeepTree(self):
        """test a configuration that generates a deep tree"""
        tmpfile = tempfile.NamedTemporaryFile()
        tmpfile.write('# this is a comment\n')
        tmpfile.write('[routes]\n')
        tmpfile.write('admin: nodes[0-9]\n')

        levels = 15  # how deep do you want the tree to be?
        for i in xrange(0, levels * 10, 10):
            line = 'nodes[%d-%d]: nodes[%d-%d]\n' % (i, i + 9, i + 10, i + 19)
            tmpfile.write(line)
        tmpfile.flush()
        parser = TopologyParser()
        parser.load(tmpfile.name)

        ns_all = NodeSet('admin,nodes[0-159]')
        ns_tree = NodeSet()
        for nodegroup in parser.tree('admin'):
            ns_tree.add(nodegroup.nodeset)
        self.assertEqual(str(ns_all), str(ns_tree))
예제 #24
0
 def get_template_for_vm(self, vm_name):
     template_list = self.get_template_list()
     for template in template_list:
         section = "template:%s" % template
         vm_names = self.get(section, 'vm_names', fallback='')
         if vm_names == '':
             continue
         vm_nodeset = NodeSet(vm_names)
         if vm_name in vm_nodeset:
             return template
     return None
예제 #25
0
    def stop(self, args):
        nodes = NodeSet()

        nbNoeud = len(args) - 2
        # print'nbNoeud: %d'%nbNoeud
        for i in range(1, nbNoeud + 1):
            nodes.add(args[i])
            print args[i] + ' : sudo service ' + args[nbNoeud + 1] + ' stop'

        task_self().run('sudo service ' + args[nbNoeud + 1] + ' stop',
                        nodes=nodes)
예제 #26
0
 def add_to_graph(self, color):
     """ color nodes of RUNNING jobs
     """
     if self._info['JobState'] != "RUNNING":
         return False
     netx = NetX(self._cfg)
     netx.restore()
     for node in NodeSet(self._info['NodeList']):
         netx.change_color(node, color, self._info["JobId"])
     netx.dump()
     return True
예제 #27
0
 def _testNS(self, pattern, expected_exc):
     try:
         nodeset = NodeSet(pattern)
         print nodeset
     except NodeSetParseError as e:
         self.assertEqual(e.__class__, expected_exc)
         return
     except:
         raise
     self.assert_(
         0, "error not detected/no exception raised [pattern=%s]" % pattern)
예제 #28
0
 def test_resolve_property1(self):
     '''Test replacement of symbols within a property'''
     service = BaseEntity('test_service')
     service.add_var('NODES', 'localhost,127.0.0.1')
     service.desc = 'start %NAME on %TARGET'
     service.target = '%NODES'
     self.assertEqual(service.resolve_property('target'),
                      NodeSet('localhost,127.0.0.1'))
     self.assertEqual(service.resolve_property('name'), 'test_service')
     self.assertEqual(service.resolve_property('desc'),
                      'start test_service on 127.0.0.1,localhost')
예제 #29
0
 def __init__(self,
              node,
              command,
              worker,
              stderr,
              timeout,
              autoclose=False,
              rank=None):
     ExecClient.__init__(self, node, command, worker, stderr, timeout,
                         autoclose, rank)
     self._closed_nodes = NodeSet()
예제 #30
0
    def testConfigurationParserBigTree(self):
        """test configuration parser against big propagation tree"""
        tmpfile = tempfile.NamedTemporaryFile()
        tmpfile.write(b'# this is a comment\n')
        tmpfile.write(b'[routes]\n')
        tmpfile.write(b'admin: ST[0-4]\n')
        tmpfile.write(b'ST[0-4]: STA[0-49]\n')
        tmpfile.write(b'STA[0-49]: nodes[0-10000]\n')
        tmpfile.flush()
        parser = TopologyParser()
        parser.load(tmpfile.name)

        ns_all = NodeSet('admin,ST[0-4],STA[0-49],nodes[0-10000]')
        ns_tree = NodeSet()
        tree = parser.tree('admin')
        self.assertEqual(tree.inner_node_count(), 56)
        self.assertEqual(tree.leaf_node_count(), 10001)
        for nodegroup in tree:
            ns_tree.add(nodegroup.nodeset)
        self.assertEqual(str(ns_all), str(ns_tree))