def remove_container(self, instance_num): containers = self.containers if instance_num not in containers['instances']: return instance_id = self.group_id + '_' + instance_num docker_hosts = [h['addr'].split(':')[0] for h in Sense.docker_hosts() if h['status'] == 'passing'] if containers: docker_host = containers['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) logging.info("Removing container '%s' from '%s'", instance_id, docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) docker_obj.stop(container=instance_id) docker_obj.remove_container(container=instance_id) else: logging.info("Not removing container '%s', as it doesn't exist", instance_id)
def disconnect_instance(self, instance_num): blueprint = self.blueprint allocation = self.allocation instance_id = self.group_id + '_' + instance_num addr = blueprint['instances'][instance_num]['addr'] memsize = blueprint['memsize'] network_settings = Sense.network_settings() network_name = network_settings['network_name'] if not network_name: raise RuntimeError("Network name is not specified in settings") docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) try: docker_obj.disconnect_container_from_network(instance_id, network_name, force=True) except: pass
def allocate_ip(skip=[]): global IP_CACHE global CACHE_LOCK docker_nodes = [h for h in Sense.docker_hosts() if h['status'] == 'passing'] network_settings = Sense.network_settings() subnet = network_settings['subnet'] gateway_ip = network_settings['gateway_ip'] if gateway_ip: skip += [gateway_ip] if not subnet: raise RuntimeError("Subnet is not specified in settings") invalidate_cache() with CACHE_LOCK: allocated_ips = set(IP_CACHE.keys()) # collect instances from blueprints for blueprint in Sense.blueprints().values(): for instance in blueprint['instances'].values(): allocated_ips.add(instance['addr']) net = ipaddress.ip_network(subnet) except_list = allocated_ips.union(set(skip)) for addr in net: if str(addr) not in except_list and\ not str(addr).endswith('.0'): IP_CACHE[str(addr)] = datetime.datetime.now() return str(addr) raise RuntimeError('IP Address range exhausted')
def resize_instance(self, instance_num, memsize): containers = self.containers if instance_num not in containers['instances']: return instance_id = self.group_id + '_' + instance_num docker_hosts = [h['addr'].split(':')[0] for h in Sense.docker_hosts() if h['status'] == 'passing'] if containers: docker_host = containers['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) logging.info("Resizing container '%s' to %d MiB on '%s'", instance_id, memsize, docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) cmd = "tarantool_set_config.lua TARANTOOL_SLAB_ALLOC_ARENA " + \ str(float(memsize)/1024) exec_id = docker_obj.exec_create(self.group_id + '_' + instance_num, cmd) docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError("Failed to set memory size for container " + instance_id) docker_obj.restart(container=instance_id) else: logging.info("Not resizing container '%s', as it doesn't exist", instance_id)
def get_instance_password(self, instance_num): containers = self.containers if instance_num not in containers['instances']: return instance_id = self.group_id + '_' + instance_num docker_hosts = [ h['addr'].split(':')[0] for h in Sense.docker_hosts() if h['status'] == 'passing' ] if containers: docker_host = containers['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) logging.info("Getting password for '%s' on '%s'", instance_id, docker_host) docker_obj = docker.APIClient(base_url=docker_addr, tls=global_env.docker_tls_config) try: strm, stat = docker_obj.get_archive( instance_id, '/opt/tarantool/auth.sasldb') bio = io.BytesIO() shutil.copyfileobj(strm, bio) bio.seek(0) tar = tarfile.open(fileobj=bio) fobj = tar.extractfile('auth.sasldb') return base64.b64encode(gzip.compress(fobj.read())) except docker.errors.NotFound: return None else: raise RuntimeError("No such container: %s", instance_id)
def register(self): instance_num = '1' blueprint = self.blueprint allocation = self.allocation instance_id = self.group_id + '_' + instance_num docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() consul_host = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: consul_host = host['consul_host'] if not consul_host: raise RuntimeError("Failed to find consul host of %s" % docker_host) addr = blueprint['instances'][instance_num]['addr'] check_period = blueprint['check_period'] consul_obj = consul.Consul(host=consul_host, token=global_env.consul_acl_token) container_check = { 'docker_container_id': instance_id, 'shell': "/bin/sh", 'script': "/bin/true", 'interval': "%ds" % check_period, 'status': 'warning' } replication_check = { 'docker_container_id': instance_id, 'shell': "/bin/sh", 'script': "/var/lib/mon.d/tarantool_replication.sh", 'interval': "%ds" % check_period, 'status': 'warning' } memory_check = { 'docker_container_id': instance_id, 'shell': "/bin/sh", 'script': "/var/lib/mon.d/tarantool_memory.sh", 'interval': "%ds" % check_period, 'status': 'warning' } logging.info("Registering instance '%s' on '%s'", instance_id, consul_host) ret = consul_obj.agent.service.register("tarantino", service_id=instance_id, address=addr, port=80, check=container_check, tags=['tarantool'])
def allocate(memory, anti_affinity = []): docker_hosts = [h for h in Sense.docker_hosts() if (h['status'] == 'passing' and 'im' in h['tags'])] if not docker_hosts: raise RuntimeError("There are no healthy docker nodes") blueprints = Sense.blueprints() allocations = Sense.allocations() memory_used = {h['addr'].split(':')[0]: 0 for h in docker_hosts} for group_id, blueprint in blueprints.items(): if group_id not in allocations: continue memsize = blueprint['memsize'] for instance in allocations[group_id]['instances'].values(): host = instance['host'].split(':')[0] memory_used[host] = memory_used.get(host, 0) + memsize scores = [] for docker_host in docker_hosts: addr = docker_host['addr'].split(':')[0] free_mem = docker_host['memory'] - memory_used[addr] affinity = 0 if addr in anti_affinity else 1 scores.append((affinity, free_mem, docker_host)) sorted_scores = sorted(scores, reverse=True, key=lambda k: k[0:2]) for score in sorted_scores: docker_host = score[2] addr = docker_host['addr'].split(':')[0] free_mem = docker_host['memory'] - memory_used[addr] if free_mem > memory: logging.info("Allocating new instance with %d MiB memory at '%s'", memory, addr) return addr docker_host = sorted_scores[0][2] addr = docker_host['addr'].split(':')[0] logging.info("There were no hosts with %d MiB of free memory, " + "so allocating instance on '%s'", memory, addr) return addr
def set_instance_password(self, instance_num, password): containers = self.containers if instance_num not in containers['instances']: return instance_id = self.group_id + '_' + instance_num docker_hosts = [ h['addr'].split(':')[0] for h in Sense.docker_hosts() if h['status'] == 'passing' ] if containers: docker_host = containers['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) logging.info("Setting password for '%s' on '%s'", instance_id, docker_host) docker_obj = docker.APIClient(base_url=docker_addr, tls=global_env.docker_tls_config) cmd = "memcached_set_password.lua " + password exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError("Failed to set password for container " + instance_id) else: logging.info("Not setting password for '%s', as it doesn't exist", instance_id)
def update_config(self, instance_num, config_str): containers = self.containers if instance_num not in containers['instances']: return instance_id = self.group_id + '_' + instance_num docker_hosts = [ h['addr'].split(':')[0] for h in Sense.docker_hosts() if h['status'] == 'passing' ] if containers: docker_host = containers['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) logging.info("Uploading new config for container '%s' on '%s'", instance_id, docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) buf = io.BytesIO(tar_string('service.json', config_str)) status = docker_obj.put_archive(self.group_id + '_' + instance_num, '/opt/tarantool', buf) if not status: raise RuntimeError("Failed to set config for container " + instance_id) docker_obj.restart(container=instance_id) else: logging.info( "Not setting config for container '%s', as it doesn't exist", instance_id)
def update_config(self, instance_num, config_str): containers = self.containers if instance_num not in containers['instances']: return instance_id = self.group_id + '_' + instance_num docker_hosts = [h['addr'].split(':')[0] for h in Sense.docker_hosts() if h['status'] == 'passing'] if containers: docker_host = containers['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) logging.info("Uploading new config for container '%s' on '%s'", instance_id, docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) buf = io.BytesIO(tar_string('service.json', config_str)) status = docker_obj.put_archive(self.group_id + '_' + instance_num, '/opt/tarantool', buf) if not status: raise RuntimeError("Failed to set config for container " + instance_id) docker_obj.restart(container=instance_id) else: logging.info( "Not setting config for container '%s', as it doesn't exist", instance_id)
def allocate(memory, anti_affinity=[]): docker_hosts = [ h for h in Sense.docker_hosts() if (h['status'] == 'passing' and 'im' in h['tags']) ] if not docker_hosts: raise RuntimeError("There are no healthy docker nodes") blueprints = Sense.blueprints() allocations = Sense.allocations() memory_used = {h['addr'].split(':')[0]: 0 for h in docker_hosts} for group_id, blueprint in blueprints.items(): if group_id not in allocations: continue memsize = blueprint['memsize'] for instance in allocations[group_id]['instances'].values(): host = instance['host'].split(':')[0] memory_used[host] = memory_used.get(host, 0) + memsize scores = [] for docker_host in docker_hosts: addr = docker_host['addr'].split(':')[0] free_mem = docker_host['memory'] - memory_used[addr] affinity = 0 if addr in anti_affinity else 1 scores.append((affinity, free_mem, docker_host)) sorted_scores = sorted(scores, reverse=True, key=lambda k: k[0:2]) for score in sorted_scores: docker_host = score[2] addr = docker_host['addr'].split(':')[0] free_mem = docker_host['memory'] - memory_used[addr] if free_mem > memory: logging.info("Allocating new instance with %d MiB memory at '%s'", memory, addr) return addr docker_host = sorted_scores[0][2] addr = docker_host['addr'].split(':')[0] logging.info( "There were no hosts with %d MiB of free memory, " + "so allocating instance on '%s'", memory, addr) return addr
def enable_replication(self): port = 3301 blueprint = self.blueprint allocation = self.allocation for instance_num in allocation['instances']: other_instances = \ set(allocation['instances'].keys()) - set([instance_num]) addr = blueprint['instances'][instance_num]['addr'] other_addrs = [blueprint['instances'][i]['addr'] for i in other_instances] docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() logging.info("Enabling replication between '%s' and '%s'", addr, str(other_addrs)) docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) cmd = "tarantool_set_config.lua TARANTOOL_REPLICATION_SOURCE " + \ ",".join(other_addrs) attempts = 0 while attempts < 5: exec_id = docker_obj.exec_create(self.group_id + '_' + instance_num, cmd) stream = docker_obj.exec_start(exec_id, stream=True) for line in stream: logging.info("Exec: %s", str(line)) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] == 0: break time.sleep(1) attempts+=1 if attempts >= 5: raise RuntimeError("Failed to enable replication for group " + self.group_id)
def wait_for_instances(self, wait_task): port = 3301 blueprint = self.blueprint allocation = self.allocation for instance_num in allocation['instances']: other_instances = \ set(allocation['instances'].keys()) - set([instance_num]) addr = blueprint['instances'][instance_num]['addr'] other_addrs = [ blueprint['instances'][i]['addr'] for i in other_instances ] docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() instance_id = self.group_id + '_' + instance_num wait_task.log( "Waiting for '%s' to go up. It may take time to " + "load data from disk.", instance_id) docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] docker_obj = docker.APIClient(base_url=docker_addr, tls=global_env.docker_tls_config) cmd = "tarantool_is_up" attempts = 0 while True: exec_id = docker_obj.exec_create(instance_id, cmd) stream = docker_obj.exec_start(exec_id, stream=True) for line in stream: logging.info("Exec: %s", str(line)) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] == 0: break wait_task.log("Waiting for '%s' to go up. Attempt %d.", instance_id, attempts) time.sleep(1) attempts += 1
def unregister_instance(self, instance_num): services = self.services allocation = self.allocation if instance_num not in services['instances']: return instance_id = self.group_id + '_' + instance_num docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() consul_host = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: consul_host = host['consul_host'] if not consul_host: raise RuntimeError("Failed to find consul host of %s" % docker_host) consul_hosts = [ h['addr'].split(':')[0] for h in Sense.consul_hosts() if h['status'] == 'passing' ] if services: if consul_host in consul_hosts: consul_obj = consul.Consul(host=consul_host, token=global_env.consul_acl_token) check_id = instance_id + '_memory' logging.info("Unregistering check '%s'", check_id) consul_obj.agent.check.deregister(check_id) consul_obj.agent.check.deregister('service:' + instance_id) logging.info("Unregistering instance '%s' from '%s'", instance_id, consul_host) consul_obj.agent.service.deregister(instance_id) else: logging.info("Not unregistering '%s', as it's not registered", instance_id)
def upgrade_container(self, instance_num): group_id = self.group_id logging.info("Upgrading container '%s'", group_id) blueprint = self.blueprint allocation = self.allocation instance_id = self.group_id + '_' + instance_num addr = blueprint['instances'][instance_num]['addr'] memsize = blueprint['memsize'] network_settings = Sense.network_settings() network_name = network_settings['network_name'] if not network_name: raise RuntimeError("Network name is not specified in settings") docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) replica_ip = None if instance_num == '2': replica_ip = blueprint['instances']['1']['addr'] docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) self.ensure_image(docker_addr) self.ensure_network(docker_addr) mounts = docker_obj.inspect_container(instance_id)["Mounts"] binds = [] for mount in mounts: if mount['Destination'] == '/opt/tarantool': # code should be upgraded along with container continue logging.info("Keeping mount %s:%s", mount["Source"], mount["Destination"]) rw_flag = "rw" if mount['RW'] else "ro" binds.append("%s:%s:%s" % (mount['Source'], mount['Destination'], rw_flag)) docker_obj.stop(container=instance_id) docker_obj.remove_container(container=instance_id) host_config = docker_obj.create_host_config( restart_policy = { "MaximumRetryCount": 0, "Name": "unless-stopped" }, binds = binds ) cmd = 'tarantool /opt/tarantool/app.lua' networking_config = { 'EndpointsConfig': { network_name: { 'IPAMConfig': { "IPv4Address": addr, "IPv6Address": "" }, "Links": [], "Aliases": [] } } } environment = {} environment['TARANTOOL_SLAB_ALLOC_ARENA'] = float(memsize)/1024 if replica_ip: environment['TARANTOOL_REPLICATION_SOURCE'] = replica_ip + ':3301' container = docker_obj.create_container(image='tarantool-cloud-memcached', name=instance_id, command=cmd, host_config=host_config, networking_config=networking_config, environment=environment, labels=['tarantool']) docker_obj.connect_container_to_network(container.get('Id'), network_name, ipv4_address=addr) docker_obj.start(container=container.get('Id'))
def create_container(self, instance_num, other_instance_num, password, password_base64): blueprint = self.blueprint allocation = self.allocation instance_id = self.group_id + '_' + instance_num addr = blueprint['instances'][instance_num]['addr'] memsize = blueprint['memsize'] network_settings = Sense.network_settings() network_name = network_settings['network_name'] if not network_name: raise RuntimeError("Network name is not specified in settings") docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) replica_ip = None if other_instance_num is not None: replica_ip = blueprint['instances'][other_instance_num]['addr'] docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) self.ensure_image(docker_addr) self.ensure_network(docker_addr) if not replica_ip: logging.info("Creating memcached '%s' on '%s' with ip '%s'", instance_id, docker_obj.base_url, addr) else: logging.info("Creating memcached '%s' on '%s' with ip '%s'" + " and replication source: '%s'", instance_id, docker_obj.base_url, addr, replica_ip) host_config = docker_obj.create_host_config( restart_policy = { "MaximumRetryCount": 0, "Name": "unless-stopped" }) cmd = 'tarantool /opt/tarantool/app.lua' networking_config = { 'EndpointsConfig': { network_name: { 'IPAMConfig': { "IPv4Address": addr, "IPv6Address": "" }, "Links": [], "Aliases": [] } } } environment = {} environment['TARANTOOL_SLAB_ALLOC_ARENA'] = float(memsize)/1024 if password: environment['MEMCACHED_PASSWORD'] = password if password_base64: environment['MEMCACHED_PASSWORD_BASE64'] = password_base64 if replica_ip: environment['TARANTOOL_REPLICATION_SOURCE'] = replica_ip + ':3301' container = docker_obj.create_container(image='tarantool-cloud-memcached', name=instance_id, command=cmd, host_config=host_config, networking_config=networking_config, environment=environment, labels=['tarantool']) docker_obj.connect_container_to_network(container.get('Id'), network_name, ipv4_address=addr) docker_obj.start(container=container.get('Id'))
def restore(self, backup_id, storage, restore_task): blueprint = self.blueprint services = self.services group_id = self.group_id restore_task.log("Restoring group '%s'", group_id) backup = Sense.backups()[backup_id] archive_id = backup['archive_id'] mem_used = backup['mem_used'] try: for instance_num in ('1', '2'): allocation = self.allocation instance_id = self.group_id + '_' + instance_num docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() restore_task.log("Restoring instance: '%s'", instance_id) docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) if mem_used > blueprint['memsize']: err = ("Backed up instance used {} MiB of RAM, but " + "instance {} only has {} MiB max").format( mem_used, group_id, blueprint['memsize']) restore_task.set_status(task.STATUS_CRITICAL, err) return tmp_restore_dir = '/var/lib/tarantool/restore-' + uuid.uuid4().hex cmd = "mkdir '%s'" % tmp_restore_dir exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to create temp restore dir for container " + instance_id + ": " + out.decode('utf-8')) stream = storage.get_archive(archive_id) docker_obj.put_archive(instance_id, tmp_restore_dir, stream) cmd = "sh -c 'rm -rf /var/lib/tarantool/*.snap'" exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to remove existing snap files of " + instance_id + ": " + out.decode('utf-8')) cmd = "sh -c 'rm -rf /var/lib/tarantool/*.xlog'" exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to remove existing xlog files of " + instance_id + ": " + out.decode('utf-8')) cmd = "sh -c 'mv %s/* /var/lib/tarantool'" % tmp_restore_dir exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to restore files of" + instance_id + ": " + out.decode('utf-8')) cmd = "rm -rf '%s'" % tmp_restore_dir exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to remove tmp restore dir of " + instance_id + ": " + out.decode('utf-8')) restore_task.log("Restarting instance: '%s'", instance_id) docker_obj.restart(container=instance_id) restore_task.log("Enabling replication") self.wait_for_instances(restore_task) self.enable_replication() except Exception as ex: logging.exception("Failed to restore backup '%s'", group_id) restore_task.set_status(task.STATUS_CRITICAL, str(ex))
def backup(self, backup_task, storage): try: services = self.services backup_id = backup_task.backup_id group_id = self.group_id backup_task.log("Backing up group '%s'", group_id) instance_num = '1' allocation = self.allocation instance_id = self.group_id + '_' + instance_num docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) cmd = 'ls /var/lib/tarantool' exec_id = docker_obj.exec_create(self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError("Failed to list snapshots for container " + instance_id) files = out.decode('utf-8').split('\n') snapshots = [f for f in files if f.endswith('.snap')] snapshot_lsns = sorted([os.path.splitext(s)[0] for s in snapshots]) xlogs = [f for f in files if f.endswith('.xlog')] xlog_lsns = sorted([os.path.splitext(s)[0] for s in xlogs]) if not snapshot_lsns: raise RuntimeError("There are no snapshots to backup") latest_snapshot_lsn = snapshot_lsns[-1] older_xlogs = list(filter( lambda x: x <= latest_snapshot_lsn, xlog_lsns)) older_xlog = older_xlogs[-1] newer_xlogs = list(filter( lambda x: x > latest_snapshot_lsn, xlog_lsns)) xlogs_to_backup = [older_xlog] + newer_xlogs files_to_backup = [latest_snapshot_lsn + '.snap'] files_to_backup += [xlog + '.xlog' for xlog in xlogs_to_backup] backup_task.log("Backing up data: %s", ', '.join(files_to_backup)) tmp_backup_dir = '/var/lib/tarantool/backup-' + uuid.uuid4().hex cmd = "mkdir '%s'" % tmp_backup_dir exec_id = docker_obj.exec_create(self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to create temp backup dir for container " + instance_id) for file_to_backup in files_to_backup: cmd = "ln /var/lib/tarantool/%s %s/%s" % ( file_to_backup, tmp_backup_dir, file_to_backup) exec_id = docker_obj.exec_create( self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to hardlink backup file: " + out.decode('utf-8')) strm, _ = docker_obj.get_archive(instance_id, tmp_backup_dir+'/.') archive_id, size = storage.put_archive(strm) cmd = "rm -rf /var/lib/tarantool/backup-*" exec_id = docker_obj.exec_create(self.group_id + '_' + instance_num, cmd) out = docker_obj.exec_start(exec_id) ret = docker_obj.exec_inspect(exec_id) if ret['ExitCode'] != 0: raise RuntimeError( "Failed to remove temp backup dir for container " + instance_id) mem_used = services['instances'][instance_num]['mem_used'] storage.register_backup(backup_id, archive_id, group_id, 'memcached', size, mem_used) Sense.update() backup_task.set_status(task.STATUS_SUCCESS) except Exception as ex: logging.exception("Failed to backup '%s'", group_id) backup_task.set_status(task.STATUS_CRITICAL, str(ex))
def create_containers(self, password): instance_num = '1' blueprint = self.blueprint allocation = self.allocation instance_id = self.group_id + '_' + instance_num addr = blueprint['instances'][instance_num]['addr'] memsize = blueprint['memsize'] network_settings = Sense.network_settings() network_name = network_settings['network_name'] if not network_name: raise RuntimeError("Network name is not specified in settings") docker_host = allocation['instances'][instance_num]['host'] docker_hosts = Sense.docker_hosts() docker_addr = None for host in docker_hosts: if host['addr'].split(':')[0] == docker_host or \ host['consul_host'] == docker_host: docker_addr = host['addr'] if not docker_addr: raise RuntimeError("No such Docker host: '%s'" % docker_host) replica_ip = None if instance_num == '2': replica_ip = blueprint['instances']['1']['addr'] docker_obj = docker.Client(base_url=docker_addr, tls=global_env.docker_tls_config) self.ensure_image(docker_addr) self.ensure_network(docker_addr) if not replica_ip: logging.info("Creating tarantino '%s' on '%s' with ip '%s'", instance_id, docker_obj.base_url, addr) else: logging.info("Creating tarantino '%s' on '%s' with ip '%s'" + " and replication source: '%s'", instance_id, docker_obj.base_url, addr, replica_ip) host_config = docker_obj.create_host_config( restart_policy = { "MaximumRetryCount": 0, "Name": "unless-stopped" }) networking_config = { 'EndpointsConfig': { network_name: { 'IPAMConfig': { "IPv4Address": addr, "IPv6Address": "" }, "Links": [], "Aliases": [] } } } environment = {} environment['TARANTOOL_SLAB_ALLOC_ARENA'] = float(memsize)/1024 if password: environment['MEMCACHED_PASSWORD'] = password container = docker_obj.create_container(image='tarantool/tarantino', name=instance_id, host_config=host_config, networking_config=networking_config, environment=environment, labels=['tarantool']) docker_obj.connect_container_to_network(container.get('Id'), network_name, ipv4_address=addr) docker_obj.start(container=container.get('Id'))