def test_server_to_entry(self): """Tests convertion of app dictionary to ldap entry.""" srv = { '_id': 'xxx', 'cell': 'yyy', 'partition': 'p', 'traits': ['a', 'b', 'c'], 'data': {'a': '1', 'b': '2'}, } ldap_entry = { 'server': ['xxx'], 'cell': ['yyy'], 'partition': ['p'], 'trait': ['a', 'b', 'c'], 'data': ['{"a": "1", "b": "2"}'], } self.assertEqual( admin.Server(None).to_entry(srv), ldap_entry ) self.assertEqual( admin.Server(None).from_entry(ldap_entry), srv )
def _list(cell, traits, partition): """List servers""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({'cell': cell, 'traits': cli.combine(traits), 'partition': partition}) cli.out(formatter(servers))
def test_server_to_entry(self): """Tests convertion of app dictionary to ldap entry.""" srv = { '_id': 'xxx', 'cell': 'yyy', 'traits': ['a', 'b', 'c'], } ldap_entry = { 'server': ['xxx'], 'cell': ['yyy'], 'trait': ['a', 'b', 'c'], } self.assertEquals(ldap_entry, admin.Server(None).to_entry(srv)) self.assertEquals(srv, admin.Server(None).from_entry(ldap_entry))
def configure(cell, traits, server, partition, data): """Create, get or modify server configuration""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) attrs = {} if cell: attrs['cell'] = cell if traits: attrs['traits'] = cli.combine(traits) if partition: if partition == '-': partition = None attrs['partition'] = partition if data: if data == ['-']: data = None attrs['data'] = data if attrs: try: admin_srv.create(server, attrs) except ldap3.LDAPEntryAlreadyExistsResult: admin_srv.update(server, attrs) try: cli.out(formatter(admin_srv.get(server))) except ldap3.LDAPNoSuchObjectResult: click.echo('Server does not exist: %s' % server, err=True)
def scale_cmd(count, partition): """Scale nodes to specified count.""" if partition in ('-', '_default'): partition = None cell = context.GLOBAL.cell admin_srv = admin.Server(context.GLOBAL.ldap.conn) all_servers = sorted(admin_srv.list({'cell': cell}), key=lambda x: x.get('partition')) by_partition = {} for part, srvs in itertools.groupby(all_servers, lambda x: x.get('partition')): by_partition[part] = list(srvs) current_count = len( by_partition.get(partition if partition else '_default', [])) count_by_partition = collections.Counter( {p: len(s) for p, s in by_partition.items()}) if count not in {None, current_count}: if count > current_count: autoscale.create_n_servers(count - current_count, partition) else: autoscale.delete_n_servers(current_count - count, partition) count_by_partition[partition if partition else '_default'] = count for part in sorted(count_by_partition): print('{: <32}: {}'.format(part, count_by_partition[part]))
def configure(cell, traits, server, partition, data): """Create, get or modify server configuration""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) attrs = {} if cell: attrs['cell'] = cell if traits: attrs['traits'] = cli.combine(traits) if partition: if partition == '-': partition = None attrs['partition'] = partition if data: with io.open(data, 'rb') as fd: attrs['data'] = json.loads(fd.read()) if attrs: try: admin_srv.create(server, attrs) except ldap_exceptions.LDAPEntryAlreadyExistsResult: admin_srv.update(server, attrs) try: cli.out(formatter(admin_srv.get(server))) except ldap_exceptions.LDAPNoSuchObjectResult: cli.bad_exit('Server does not exist: %s', server)
def sync_servers(): """Sync global servers list.""" _LOGGER.info('Sync servers.') admin_srv = admin.Server(context.GLOBAL.ldap.conn) global_servers = admin_srv.list({}) zkutils.ensure_exists(context.GLOBAL.zk.conn, z.path.globals('servers'), data=[server['_id'] for server in global_servers])
def _list(cell, features): """Displays servers list.""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({ 'cell': cell, 'features': cli.combine(features) }) cli.out(formatter(servers))
def test_create(self): """Dummy test for treadmill.api.server.create()""" svr_admin = admin.Server(None) self.svr.create('foo.somewhere.in.xx.com', { 'cell': 'ny-999-cell', 'partition': 'xxx' }) svr_admin.get.assert_called_with('foo.somewhere.in.xx.com', dirty=True)
def _list(cell, traits, label): """List servers""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({ 'cell': cell, 'traits': cli.combine(traits), 'label': label }) cli.out(formatter(servers))
def _check(conn, **_kwargs): """Server state: """ admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers_in_ldap = { server['_id']: server['partition'] for server in admin_srv.list({'cell': context.GLOBAL.cell}) } zkclient = context.GLOBAL.zk.conn presence = set(zkclient.get_children(z.SERVER_PRESENCE)) in_zk = set(zkclient.get_children(z.SERVERS)) blacked_out = set(zkclient.get_children(z.BLACKEDOUT_SERVERS)) conn.execute(""" CREATE TABLE servers ( name text, partition text, in_ldap, in_zk, up integer, blackout integer, presence integer ) """) all_servers = set(servers_in_ldap.keys()) | in_zk up = {server: checkout.telnet(server) for server in all_servers} rows = [] for name in set(servers_in_ldap.keys()) | in_zk: rows.append(( name, servers_in_ldap.get(name), name in servers_in_ldap, name in in_zk, name in up, name in blacked_out, name in presence, )) conn.executemany( """ INSERT INTO servers( name, partition, in_ldap, in_zk, up, blackout, presence ) values(?, ?, ?, ?, ?, ?, ?) """, rows) return _metadata()
def sync_server_topology(): """Sync servers into buckets in the masterapi. """ admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({'cell': context.GLOBAL.cell}) zkclient = context.GLOBAL.zk.conn # Cells are composed of buckets. The topology is ~1000 servers per pod # with each pod composed of racks, each ~40 servers. def _server_pod_rack(servername): # In the absence of any information about the server and topology, we # simply hash the servername and use the value to place the server in # a fictive topology of at most 4 pods, each with 16 racks. svr_hash = hashlib.md5(servername.encode()).hexdigest() svr_id = int(svr_hash, 16) # That is a 128 bit number pod = (svr_id >> (128 - 2)) # First 2 bits -> pod # below the first 2 bits, we take the rest, modulo 16 rack = (svr_id % (1 << (128 - 2))) % 16 return (pod, rack) for server in servers: servername = server['_id'] partition = server.get('partition') (pod, rack) = _server_pod_rack(servername) pod_bucket = 'pod:{:04X}'.format(pod) rack_bucket = 'rack:{:04X}'.format(rack) _LOGGER.info('Update: %r(partition:%r) -> %r, %r', servername, partition, pod_bucket, rack_bucket) masterapi.create_bucket(zkclient, pod_bucket, parent_id=None) masterapi.cell_insert_bucket(zkclient, pod_bucket) masterapi.create_bucket(zkclient, rack_bucket, parent_id=pod_bucket) masterapi.create_server( zkclient, servername, rack_bucket, partition=partition ) ldap_servers = set(server['_id'] for server in servers) zk_servers = set(masterapi.list_servers(zkclient)) zk_server_presence = set(zkclient.get_children(z.SERVER_PRESENCE)) for servername in zk_servers - ldap_servers: if servername in zk_server_presence: _LOGGER.warning('%s not in LDAP but node still present, skipping.', servername) else: _LOGGER.info('Delete: %s', servername) masterapi.delete_server(zkclient, servername)
def delete_servers_by_name(servers): """Delete servers by name.""" ipa_client = awscontext.GLOBAL.ipaclient ec2_conn = awscontext.GLOBAL.ec2 _LOGGER.info('Deleting servers: %r', servers) hostmanager.delete_hosts(ipa_client=ipa_client, ec2_conn=ec2_conn, hostnames=servers) admin_srv = admin.Server(context.GLOBAL.ldap.conn) for server in servers: admin_srv.delete(server)
def test_list(self): """Dummy test for treadmill.api.server._list()""" self.svr.list(None, None) svr_admin = admin.Server(None) self.assertTrue(svr_admin.list.called) self.svr.list('some-cell', None) svr_admin.list.assert_called_with({'cell': 'some-cell'}) self.svr.list(partition='xxx') svr_admin.list.assert_called_with({}) self.svr.list('some-cell', 'xxx') svr_admin.list.assert_called_with({'cell': 'some-cell'})
def _resolve_partition_threshold(cell, partition, value): """Resolve threshold % to an integer.""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({'cell': cell}) total = 0 for srv in servers: if srv['partition'] == partition: total = total + 1 limit = int((value / 100.0) * total) _LOGGER.debug('Total/limit: %s/%s', total, limit) return max(limit, _MINIMUM_THRESHOLD)
def test(): """Create sysapps test class.""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) cell = context.GLOBAL.cell ldap_servers = [item['_id'] for item in admin_srv.list({'cell': cell})] zkclient = context.GLOBAL.zk.conn configured_servers = zkclient.get_children(z.SERVERS) up_servers = [presence.server_hostname(node) for node in zkclient.get_children(z.SERVER_PRESENCE)] blackedout_servers = zkclient.get_children(z.BLACKEDOUT_SERVERS) rebooted_servers = zkclient.get_children(z.REBOOTS) class LdapSyncTest(unittest.TestCase): """Checks LDAP to Zookeeper server sync.""" for server in ldap_servers: @chk.T(LdapSyncTest, server=server, configured_servers=configured_servers) def _test_server_configured(self, server, configured_servers): """Check if server is synced between LDAP and Zk: {server}.""" self.assertIn(server, configured_servers) class ServerTest(unittest.TestCase): """Checks server(s) are up and alive.""" expected_up = ( set(configured_servers) - set(blackedout_servers) - set(rebooted_servers) ) for server in expected_up: @chk.T(ServerTest, server=server, up_servers=up_servers) def _test_server_up(self, server, up_servers): """Check if server is up: {server}.""" self.assertIn(server, up_servers) @chk.T(ServerTest, server=server) def _test_server_ssh(self, server): """Check if SSH port is open: {server}.""" self.assertTrue(chk.telnet(server, 22)) # TODO: implement test that for each partition sum of available capacity # is not below partition threshold. return [LdapSyncTest, ServerTest]
def delete_n_servers(count, partition=None): """Delete old servers.""" ipa_client = awscontext.GLOBAL.ipaclient ec2_conn = awscontext.GLOBAL.ec2 admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({ 'cell': context.GLOBAL.cell, 'partition': partition }) hostnames = sorted([s['_id'] for s in servers]) extra = hostnames[0:count] hostmanager.delete_hosts(ipa_client=ipa_client, ec2_conn=ec2_conn, hostnames=extra) for hostname in extra: admin_srv.delete(hostname)
def _state(): """Return tuple that represents current state: (running_apps_count, pending_apps_count, busy_server_count, idle_servers) """ cellapis = context.GLOBAL.state_api() response = restclient.get(cellapis, _STATE_URL) apps = response.json() admin_srv = admin.Server(context.GLOBAL.ldap.conn) running = 0 pending = 0 busy_servers = set() for app in apps: if app['host']: running += 1 busy_servers.add(app['host']) else: pending += 1 _LOGGER.info('Apps: running: %s, pending: %s', running, pending) servers = admin_srv.list({'cell': context.GLOBAL.cell}) all_servers = {s['_id'] for s in servers} idle_servers = all_servers - busy_servers _LOGGER.info( 'Servers: busy: %s, idle: %s', len(busy_servers), len(idle_servers) ) return _STATE( running=running, pending=pending, busy_srv_cnt=len(busy_servers), idle_servers=list(idle_servers) )
def sync_servers(): """Sync servers and buckets.""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) servers = admin_srv.list({'cell': context.GLOBAL.cell}) for server in servers: servername = server['_id'] rack = 'rack:unknown' building = 'building:unknown' traits = [] partition = None master.create_bucket(context.GLOBAL.zk.conn, building, None) master.cell_insert_bucket(context.GLOBAL.zk.conn, building) master.create_bucket(context.GLOBAL.zk.conn, rack, building) master.create_server(context.GLOBAL.zk.conn, servername, rack) master.update_server_attrs(context.GLOBAL.zk.conn, servername, traits=traits, partition=partition)
def configure(cell, features, server): """Get or modify server configuration.""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) attrs = {} if cell: attrs['cell'] = cell if features: attrs['features'] = cli.combine(features) if attrs: try: admin_srv.create(server, attrs) except ldap3.LDAPEntryAlreadyExistsResult: admin_srv.update(server, attrs) try: cli.out(formatter(admin_srv.get(server))) except ldap3.LDAPNoSuchObjectResult: click.echo('Server does not exist: %s' % server, err=True)
def _run_sync(): """Sync Zookeeper with LDAP, runs with lock held. """ while True: # Sync app groups admin_app_group = admin.AppGroup(context.GLOBAL.ldap.conn) app_groups = admin_app_group.list({}) _sync_collection(context.GLOBAL.zk.conn, app_groups, z.path.appgroup(), _match_appgroup) # Sync partitions admin_cell = admin.Cell(context.GLOBAL.ldap.conn) partitions = admin_cell.partitions(context.GLOBAL.cell) _sync_partitions(context.GLOBAL.zk.conn, partitions) # Sync allocations. admin_alloc = admin.CellAllocation(context.GLOBAL.ldap.conn) allocations = admin_alloc.list({'cell': context.GLOBAL.cell}) _sync_allocations(context.GLOBAL.zk.conn, allocations) # Global servers admin_srv = admin.Server(context.GLOBAL.ldap.conn) global_servers = admin_srv.list({}) zkutils.ensure_exists( context.GLOBAL.zk.conn, z.path.globals('servers'), data=[server['_id'] for server in global_servers]) # Servers - because they can have custom topology - are loaded # from the plugin. try: servers_plugin = importlib.import_module( 'treadmill.plugins.sproc.servers') servers_plugin.init() except ImportError as err: _LOGGER.warning( 'Unable to load treadmill.plugins.sproc.servers: %s', err) time.sleep(60)
def _get_server_info(): """Get server information""" return admin.Server(context.GLOBAL.ldap.conn).list( {'cell': context.GLOBAL.cell})
def test_get(self): """Dummy test for treadmill.api.server.get()""" svr_admin = admin.Server(None) self.svr.get('foo.somewhere.in.xx.com') svr_admin.get.assert_called_with('foo.somewhere.in.xx.com')
def delete(servers): """Delete server(s).""" admin_srv = admin.Server(context.GLOBAL.ldap.conn) for server in servers: admin_srv.delete(server)
def create_n_servers(count, partition=None): """Create new servers in the cell.""" ipa_client = awscontext.GLOBAL.ipaclient ec2_conn = awscontext.GLOBAL.ec2 sts_conn = awscontext.GLOBAL.sts ipa_domain = awscontext.GLOBAL.ipa_domain admin_srv = admin.Server(context.GLOBAL.ldap.conn) admin_cell = admin.Cell(context.GLOBAL.ldap.conn) cell = admin_cell.get(context.GLOBAL.cell) data = cell['data'] image_id = data['image'] if not image_id.startswith('ami-'): account = sts_conn.get_caller_identity().get('Account') image_id = ec2client.get_image(ec2_conn, owners=[account], name=image_id)['ImageId'] instance_type = data['size'] subnets = data['subnets'] secgroup_id = data['secgroup'] hostgroups = data['hostgroups'] instance_profile = data['instance_profile'] disk_size = int(data['disk_size']) hostname_template = '{}-{}-{}'.format(context.GLOBAL.cell, partition if partition else 'node', '{time}') instance_vars = { 'treadmill_cell': context.GLOBAL.cell, 'treadmill_ldap': ','.join(context.GLOBAL.ldap.url), 'treadmill_ldap_suffix': context.GLOBAL.ldap_suffix, 'treadmill_dns_domain': context.GLOBAL.dns_domain, 'treadmill_isa': 'node', 'treadmill_profile': 'aws', 'treadmill_krb_realm': krb5.get_host_realm(sysinfo.hostname())[0], } # FIXME: Add Partition: $partition to tags when Autoscaler is cell aware tags = [{'Key': 'Cell', 'Value': context.GLOBAL.cell}] key = None for idx in range(0, count): hostnames = hostmanager.create_host(ipa_client=ipa_client, ec2_conn=ec2_conn, image_id=image_id, count=1, disk=disk_size, domain=ipa_domain, key=key, secgroup_ids=secgroup_id, instance_type=instance_type, subnets=subnets, role='node', instance_vars=instance_vars, instance_profile=instance_profile, hostgroups=hostgroups, hostname=hostname_template, ip_address=None, eni=None, tags=tags) # Count is one, but it is more robust to treat it as list. for hostname in hostnames: print(hostname) attrs = {'cell': context.GLOBAL.cell, 'partition': partition} admin_srv.create(hostname, attrs)
def delete(hostname): """Delete a LDAP server record""" _LOGGER.info('removing %s from LDAP', hostname) admin.Server(context.GLOBAL.ldap.conn).delete(hostname)
def list(): """List LDAP server records that are not valid ec2 instances""" _LOGGER.info('fetched server list from LDAP') client = admin.Server(context.GLOBAL.ldap.conn) return {host.get("_id") for host in client.list({})}
def _admin_svr(): """Lazily return admin object.""" return admin.Server(context.GLOBAL.ldap.conn)