def set_etcd_config(flag, value): client = Etcd3Client(host='localhost', port=2379, protocol='http', api_path='/v3beta/') config = {} current_config = client.get('/sf/config', metadata=True) if current_config is None or len(current_config) == 0: config = {} else: config = json.loads(current_config[0][0]) # Convert values if possible if value in ['t', 'true', 'True']: value = True elif value in ['f', 'false', 'False']: value = False else: try: if value.find('.') != -1: value = float(value) else: value = int(value) except ValueError: pass click.echo('Setting %s to %s(%s)' % (flag, type(value), value)) config[flag] = value client.put('/sf/config', json.dumps(config, indent=4, sort_keys=True))
def dequeue(queuename): queue_path = _construct_key('queue', queuename, None) client = Etcd3Client() # We only hold the lock if there is anything in the queue if not client.get_prefix(queue_path): return None, None with get_lock('queue', None, queuename, op='Dequeue'): for data, metadata in client.get_prefix(queue_path, sort_order='ascend', sort_target='key'): jobname = str(metadata['key']).split('/')[-1].rstrip("'") workitem = json.loads(data, object_hook=decodeTasks) put('processing', queuename, jobname, workitem) client.delete(metadata['key']) LOG.withFields({ 'jobname': jobname, 'queuename': queuename, 'workitem': workitem, }).info('Moved workitem from queue to processing') return jobname, workitem return None, None
def _on_check_etcd(self, event): """Check if node is accounted for. Check if slurmctld accounted for this node's inventory for the first time, if so, emit slurmctld_started event, so the node can start the daemon. """ host = self._slurmd.slurmctld_address port = self._slurmd.etcd_port logger.debug(f"## Connecting to etcd3 in {host}:{port}") client = Etcd3Client(host=host, port=port, api_path="/v3/") logger.debug("## Querying etcd3 for node list") try: v = client.get(key="all_nodes") logger.debug(f"## Got: {v}") except Exception as e: logger.error( f"## Unable to connect to {host} to get list of nodes: {e}") event.defer() return node_accounted = False if v: hostnames = json.loads(v[0]) logger.debug(f"### etcd3 node list: {hostnames}") if self.hostname in hostnames: self.on.slurmctld_started.emit() node_accounted = True if not node_accounted: logger.debug("## Node not accounted for. Deferring.") event.defer()
def get_holder(self): value = Etcd3Client().get(self.key, metadata=True) if value is None or len(value) == 0: return None, NotImplementedError if not value[0][0]: return None, None d = json.loads(value[0][0]) return d['node'], d['pid']
def show_etcd_config(): value = Etcd3Client(host='localhost', port=2379, protocol='http', api_path='/v3beta/').get('/sf/config', metadata=True) if value is None or len(value) == 0: click.echo('{}') else: click.echo( json.dumps(json.loads(value[0][0]), indent=4, sort_keys=True))
def get_lock(objecttype, subtype, name, ttl=60, timeout=10, log_ctx=LOG, op=None): """Retrieves an etcd lock object. It is not locked, to lock use acquire(). The returned lock can be used as a context manager, with the lock being acquired on entry and released on exit. Note that the lock acquire process will have no timeout. """ return ActualLock(objecttype, subtype, name, ttl=ttl, client=Etcd3Client(), log_ctx=log_ctx, timeout=timeout, op=op)
def _restart_queue(queuename): queue_path = _construct_key('processing', queuename, None) with get_lock('queue', None, queuename): for data, metadata in Etcd3Client().get_prefix(queue_path, sort_order='ascend'): jobname = str(metadata.key).split('/')[-1].rstrip("'") workitem = json.loads(data) put('queue', queuename, jobname, workitem) delete('processing', queuename, jobname) logutil.warning(None, 'Reset %s workitem %s' % (queuename, jobname))
def _restart_queue(queuename): queue_path = _construct_key('processing', queuename, None) with get_lock('queue', None, queuename, op='Restart'): for data, metadata in Etcd3Client().get_prefix(queue_path, sort_order='ascend'): jobname = str(metadata['key']).split('/')[-1].rstrip("'") workitem = json.loads(data) put('queue', queuename, jobname, workitem) delete('processing', queuename, jobname) LOG.withFields({'jobname': jobname, 'queuename': queuename, }).warning('Reset workitem')
def etcd_settings(_settings): try: value = Etcd3Client( host='localhost', port=2379, protocol='http', api_path='/v3beta/').get( '/sf/config', metadata=True) if value is None or len(value) == 0: return {} return json.loads(value[0][0]) except ConnectionFailedError: # NOTE(mikal): I'm not sure this is the right approach, as it might cause # us to silently ignore config errors. However, I can't just mock this away # in tests because this code runs before the mocking occurs. return {}
def main(): client = Etcd3Client() print('>>>> Status') result = client.status() print("cluster id : %r" % result['header']['cluster_id']) result = client.members() print("first member info : %r" % result[0]) print('>>>> Lease') lease = client.lease() print("Lease id : %r" % lease.id) print("Lease ttl : %r" % lease.ttl()) print("Lease refresh : %r" % lease.refresh()) result = client.put('foo2', 'bar2', lease) print("Key put foo2 : %r" % result) result = client.put('foo3', 'bar3', lease) print("Key put foo3 : %r" % result) print("Lease Keys : %r" % lease.keys()) result = lease.revoke() print("Lease Revoke : %r" % result) result = client.get('foox') print("Key get foox : %r" % result) result = client.put('foo', 'bar') print("Key put foo : %r" % result) result = client.get('foo') print("Key get foo : %r" % result) result = client.delete('foo') print("Key delete foo : %r" % result) result = client.delete('foo-unknown') print("Key delete foo-unknown : %r" % result) print('>>>> Lock') lock = Lock('xyz-%s' % clock(), ttl=10000, client=client) result = lock.acquire() print("acquire : %r" % result) result = lock.refresh() print("refresh : %r" % result) result = lock.is_acquired() print("is_acquired : %r" % result) result = lock.release() print("release : %r" % result) result = lock.is_acquired() print("is_acquired : %r" % result)
def test_client_exceptions_by_code(self): client = Etcd3Client(host="127.0.0.1") with mock.patch.object(client, "session") as mock_session: mock_response = mock.Mock() mock_response.status_code = 500 mock_response.reason = "Internal Server Error" mock_response.text = '''{ "error": "etcdserver: unable to reach quorum" }''' mock_session.post.return_value = mock_response try: client.status() self.assertFalse(True) except InternalServerError as e: self.assertEqual(str(e), "Internal Server Error") self.assertEqual( e.detail_text, '''{ "error": "etcdserver: unable to reach quorum" }''')
def clear_stale_locks(): # Remove all locks held by former processes on this node. This is required # after an unclean restart, otherwise we need to wait for these locks to # timeout and that can take a long time. client = Etcd3Client() for data, metadata in client.get_prefix( LOCK_PREFIX + '/', sort_order='ascend', sort_target='key'): lockname = str(metadata['key']).replace(LOCK_PREFIX + '/', '') holder = json.loads(data) node = holder['node'] pid = int(holder['pid']) if node == config.NODE_NAME and not psutil.pid_exists(pid): client.delete(metadata['key']) LOG.withFields({'lock': lockname, 'old-pid': pid, 'old-node': node, }).warning('Removed stale lock')
def dequeue(queuename): queue_path = _construct_key('queue', queuename, None) client = Etcd3Client() with get_lock('queue', None, queuename): for data, metadata in client.get_prefix(queue_path, sort_order='ascend', sort_target='key'): jobname = str(metadata['key']).split('/')[-1].rstrip("'") workitem = json.loads(data) put('processing', queuename, jobname, workitem) client.delete(metadata['key']) logutil.info( None, 'Moved workitem %s from queue to processing for %s with work %s' % (jobname, queuename, workitem)) return jobname, workitem return None, None
def clear_stale_locks(): # Remove all locks held by former processes on this node. This is required # after an unclean restart, otherwise we need to wait for these locks to # timeout and that can take a long time. client = Etcd3Client() for data, metadata in client.get_prefix('/sflocks/', sort_order='ascend', sort_target='key'): lockname = str(metadata['key']).replace('/sflocks/', '') holder = json.loads(data) node = holder['node'] pid = int(holder['pid']) if node == config.parsed.get( 'NODE_NAME') and not psutil.pid_exists(pid): client.delete(metadata['key']) logutil.warning( None, 'Removed stale lock for %s, previously held by pid %s on %s' % (lockname, pid, node))
def test_client_bad_request(self): client = Etcd3Client(host="127.0.0.1") with mock.patch.object(client, "session") as mock_session: mock_response = mock.Mock() mock_response.status_code = 400 mock_response.reason = "Bad Request" mock_response.text = '''{ "error": "etcdserver: mvcc: required revision has been compacted", "code": 11 }''' mock_session.post.return_value = mock_response try: client.status() self.assertFalse(True) except Etcd3Exception as e: self.assertEqual(str(e), "Bad Request") self.assertEqual( e.detail_text, '''{ "error": "etcdserver: mvcc: required revision has been compacted", "code": 11 }''')
def delete_all(objecttype, subtype, sort_order=None): path = _construct_key(objecttype, subtype, None) Etcd3Client().delete_prefix(path)
def get_existing_locks(): key_val = {} for value in Etcd3Client().get_prefix(LOCK_PREFIX + '/'): key_val[value[1]['key'].decode('utf-8')] = json.loads(value[0]) return key_val
def test_client_ipv6(self): client = Etcd3Client(host="::1") self.assertEqual("http://[::1]:2379/v3alpha/lease/grant", client.get_url("/lease/grant"))
def test_client_default(self): client = Etcd3Client() self.assertEqual("http://localhost:2379/v3alpha/lease/grant", client.get_url("/lease/grant"))
def test_client_ipv4(self): client = Etcd3Client(host="127.0.0.1") self.assertEqual("http://127.0.0.1:2379/v3alpha/lease/grant", client.get_url("/lease/grant"))
def create(objecttype, subtype, name, data, ttle=None): path = _construct_key(objecttype, subtype, name) encoded = json.dumps(data, indent=4, sort_keys=True, cls=JSONEncoderTasks) return Etcd3Client().create(path, encoded, lease=None)
def setUpClass(cls): cls.client = Etcd3Client()
def main(): client = Etcd3Client() events, cancel = client.watch('foo') for event in events: print(">>>> event : %r", event)
def get(objecttype, subtype, name): path = _construct_key(objecttype, subtype, name) value = Etcd3Client().get(path, metadata=True) if value is None or len(value) == 0: return None return json.loads(value[0][0])
def set_list_of_accounted_nodes(self, nodes: List[str]) -> None: """Set list of nodes on etcd.""" logger.debug(f"## setting on etcd: all_nodes/{nodes}") client = Etcd3Client(api_path="/v3/") client.put(key="all_nodes", value=json.dumps(nodes))
def get_all(objecttype, subtype, sort_order=None): path = _construct_key(objecttype, subtype, None) for value in Etcd3Client().get_prefix(path, sort_order=sort_order): yield json.loads(value[0])
def get_all_dict(objecttype, subtype=None, sort_order=None): path = _construct_key(objecttype, subtype, None) key_val = {} for value in Etcd3Client().get_prefix(path, sort_order=sort_order): key_val[value[1]['key'].decode('utf-8')] = json.loads(value[0]) return key_val
def delete(objecttype, subtype, name): path = _construct_key(objecttype, subtype, name) Etcd3Client().delete(path)
def put(objecttype, subtype, name, data, ttl=None): path = _construct_key(objecttype, subtype, name) encoded = json.dumps(data, indent=4, sort_keys=True) Etcd3Client().put(path, encoded, lease=None)