def extend_cluster(master_ip, new_ip, cluster_name, exclude_ports, base_dir): """ Extends a cluster to a given new node """ logger.debug("Extending cluster {0} from {1} to {2}".format(cluster_name, master_ip, new_ip)) client = SSHClient(master_ip) config = ArakoonClusterConfig(cluster_name) config.load_config(client) client = SSHClient(new_ip) base_dir = base_dir.rstrip("/") port_range = client.config_read("ovs.ports.arakoon") ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name), ) ) ArakoonInstaller._deploy(config) logger.debug("Extending cluster {0} from {1} to {2} completed".format(cluster_name, master_ip, new_ip)) return {"client_port": ports[0], "messaging_port": ports[1]}
def create_cluster(cluster_name, ip, exclude_ports, base_dir, plugins=None): """ Creates a cluster """ logger.debug("Creating cluster {0} on {1}".format(cluster_name, ip)) client = SSHClient(ip) base_dir = base_dir.rstrip("/") port_range = client.config_read("ovs.ports.arakoon") ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) config = ArakoonClusterConfig(cluster_name, plugins) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name), ) ) ArakoonInstaller._deploy(config) logger.debug("Creating cluster {0} on {1} completed".format(cluster_name, ip)) return {"client_port": ports[0], "messaging_port": ports[1]}
def extend_cluster(master_ip, new_ip, cluster_name, exclude_ports): """ Extends a cluster to a given new node """ logger.debug('Extending cluster {0} from {1} to {2}'.format( cluster_name, master_ip, new_ip)) client = SSHClient(master_ip) config = ArakoonClusterConfig(cluster_name) config.load_config(client) client = SSHClient(new_ip) base_dir = client.config_read('ovs.arakoon.location').rstrip('/') port_range = client.config_read('ovs.ports.arakoon') ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format( cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name))) ArakoonInstaller._deploy(config) logger.debug('Extending cluster {0} from {1} to {2} completed'.format( cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def status(cluster_name, ip): client = SSHClient.load(ip) cmd = """ from ovs.plugin.provider.service import Service print Service.get_service_status('arakoon-{0}') """.format(cluster_name) System.exec_remote_python(client, cmd)
def create_cluster(cluster_name, ip, exclude_ports, plugins=None): """ Creates a cluster """ logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip)) client = SSHClient(ip) base_dir = client.config_read('ovs.arakoon.location').rstrip('/') port_range = client.config_read('ovs.ports.arakoon') ports = System.get_free_ports(port_range, exclude_ports, 2, client) node_name = System.get_my_machine_id(client) config = ArakoonClusterConfig(cluster_name, plugins) if not [node.name for node in config.nodes if node.name == node_name]: config.nodes.append( ArakoonNodeConfig( name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format( cluster_name), home=ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name), tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name))) ArakoonInstaller._deploy(config) logger.debug('Creating cluster {0} on {1} completed'.format( cluster_name, ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list( ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error( ' Could not load port information of cluster {0}'. format(cluster_name)) ports = System.get_free_ports( EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug( ' Loaded free ports {0} based on existing clusters {1}'.format( ports, clusters)) return ports
def catchup_cluster_node(cluster_name, ip): client = SSHClient.load(ip) cmd = """ from ovs.extensions.db.arakoon.ArakoonManagement import ArakoonManagementEx cluster = ArakoonManagementEx().getCluster('{0}') cluster.catchup_node() """.format(cluster_name) System.exec_remote_python(client, cmd)
def delete_dir_structure(self, client=None, cluster_name=None): if cluster_name is None: cluster_name = self.config.cluster_name cmd = """ rm -rf {0}/arakoon/{1} rm -rf {0}/tlogs/{1} rm -rf /var/log/arakoon/{1} """.format(self.config.base_dir, cluster_name) System.run(cmd, client)
def create_dir_structure(self, client=None, cluster_name=None): if cluster_name is None: cluster_name = self.config.cluster_name cmd = """ mkdir -p {0}/arakoon/{1} mkdir -p {0}/tlogs/{1} mkdir -p /var/log/arakoon/{1} """.format(self.config.base_dir, cluster_name) System.run(cmd, client)
class Helper(object): """ Helper module """ MODULE = "utils" SETTINGS_LOC = "/opt/OpenvStorage/config/healthcheck/settings.json" RAW_INIT_MANAGER = str( subprocess.check_output('cat /proc/1/comm', shell=True)).strip() LOCAL_SR = System.get_my_storagerouter() LOCAL_ID = System.get_my_machine_id() with open(SETTINGS_LOC) as settings_file: settings = json.load(settings_file) debug_mode = settings["healthcheck"]["debug_mode"] enable_logging = settings["healthcheck"]["logging"]["enable"] max_log_size = settings["healthcheck"]["max_check_log_size"] packages = settings["healthcheck"]["package_list"] extra_ports = settings["healthcheck"]["extra_ports"] rights_dirs = settings["healthcheck"]["rights_dirs"] owners_files = settings["healthcheck"]["owners_files"] max_hours_zero_disk_safety = settings["healthcheck"][ "max_hours_zero_disk_safety"] @staticmethod def get_healthcheck_version(): """ Gets the installed healthcheck version :return: version number of the installed healthcheck :rtype: str """ client = SSHClient(System.get_my_storagerouter()) package_name = 'openvstorage-health-check' package_manager = PackageFactory.get_manager() packages = package_manager.get_installed_versions( client=client, package_names=[package_name]) return packages.get(package_name, 'unknown') @staticmethod def get_local_settings(): """ Fetch settings of the local Open vStorage node :return: local settings of the node :rtype: dict """ # Fetch all details local_settings = { 'cluster_id': Configuration.get("/ovs/framework/cluster_id"), 'hostname': socket.gethostname(), 'storagerouter_id': Helper.LOCAL_ID, 'storagerouter_type': Helper.LOCAL_SR.node_type, 'environment os': ' '.join(platform.linux_distribution()) } return local_settings
def create_cluster(cluster_name, ip, exclude_ports, plugins=None): ai = ArakoonInstaller() ai.clear_config() client = SSHClient.load(ip) port_range = System.read_remote_config(client, 'ovs.ports.arakoon') free_ports = System.get_free_ports(port_range, exclude_ports, 2, client) ai.create_config(cluster_name, ip, free_ports[0], free_ports[1], plugins) ai.generate_configs(client) ai.create_dir_structure(client) return {'client_port': free_ports[0], 'messaging_port': free_ports[1]}
def extend_cluster(src_ip, tgt_ip, cluster_name, exclude_ports): ai = ArakoonInstaller() ai.load_config_from(cluster_name, src_ip) client = SSHClient.load(tgt_ip) tgt_id = System.get_my_machine_id(client) port_range = System.read_remote_config(client, 'ovs.ports.arakoon') free_ports = System.get_free_ports(port_range, exclude_ports, 2, client) ai.create_dir_structure(client) ai.add_node_to_config(tgt_id, tgt_ip, free_ports[0], free_ports[1]) ai.upload_config_for(cluster_name) return {'client_port': free_ports[0], 'messaging_port': free_ports[1]}
def update_storagedrivers(storagedriver_guids, storagerouters, parameters): """ Add/remove multiple vPools @param storagedriver_guids: Storage Drivers to be removed @param storagerouters: StorageRouters on which to add a new link @param parameters: Settings for new links """ success = True # Add Storage Drivers for storagerouter_ip, storageapplaince_machineid in storagerouters: try: new_parameters = copy.copy(parameters) new_parameters['storagerouter_ip'] = storagerouter_ip local_machineid = System.get_my_machine_id() if local_machineid == storageapplaince_machineid: # Inline execution, since it's on the same node (preventing deadlocks) StorageRouterController.add_vpool(new_parameters) else: # Async execution, since it has to be executed on another node # @TODO: Will break in Celery 3.2, need to find another solution # Requirements: # - This code cannot continue until this new task is completed (as all these Storage Router # need to be handled sequentially # - The wait() or get() method are not allowed anymore from within a task to prevent deadlocks result = StorageRouterController.add_vpool.s(new_parameters).apply_async( routing_key='sr.{0}'.format(storageapplaince_machineid) ) result.wait() except: success = False # Remove Storage Drivers for storagedriver_guid in storagedriver_guids: try: storagedriver = StorageDriver(storagedriver_guid) storagerouter_machineid = storagedriver.storagerouter.machine_id local_machineid = System.get_my_machine_id() if local_machineid == storagerouter_machineid: # Inline execution, since it's on the same node (preventing deadlocks) StorageRouterController.remove_storagedriver(storagedriver_guid) else: # Async execution, since it has to be executed on another node # @TODO: Will break in Celery 3.2, need to find another solution # Requirements: # - This code cannot continue until this new task is completed (as all these VSAs need to be # handled sequentially # - The wait() or get() method are not allowed anymore from within a task to prevent deadlocks result = StorageRouterController.remove_storagedriver.s(storagedriver_guid).apply_async( routing_key='sr.{0}'.format(storagerouter_machineid) ) result.wait() except: success = False return success
def create(self): """ Prepares a new Storagedriver for a given vPool and Storagerouter :return: None :rtype: NoneType """ if self.sr_installer is None: raise RuntimeError('No StorageRouterInstaller instance found') machine_id = System.get_my_machine_id(client=self.sr_installer.root_client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) storagerouter = self.sr_installer.storagerouter with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30): model_ports_in_use = [] for sd in StorageDriverList.get_storagedrivers(): if sd.storagerouter_guid == storagerouter.guid: model_ports_in_use += sd.ports.values() for proxy in sd.alba_proxies: model_ports_in_use.append(proxy.service.ports[0]) ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client) vpool = self.vp_installer.vpool vrouter_id = '{0}{1}'.format(vpool.name, machine_id) storagedriver = StorageDriver() storagedriver.name = vrouter_id.replace('_', ' ') storagedriver.ports = {'management': ports[0], 'xmlrpc': ports[1], 'dtl': ports[2], 'edge': ports[3]} storagedriver.vpool = vpool storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id)) storagedriver.storage_ip = self.storage_ip storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name) storagedriver.description = storagedriver.name storagedriver.storagerouter = storagerouter storagedriver.storagedriver_id = vrouter_id storagedriver.save() # ALBA Proxies proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY) for proxy_id in xrange(self.sr_installer.requested_proxies): service = Service() service.storagerouter = storagerouter service.ports = [ports[4 + proxy_id]] service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id) service.type = proxy_service_type service.save() alba_proxy = AlbaProxy() alba_proxy.service = service alba_proxy.storagedriver = storagedriver alba_proxy.save() self.storagedriver = storagedriver
def get_local_storagerouter(): """ Fetches the details of a local storagerouter :return: a StorageRouter :rtype: ovs.dal.hybrids.storagerouter.StorageRouter """ return System.get_my_storagerouter()
def process_IN_MOVED_TO(self, event): try: self._logger.debug('path: {0} - name: {1} - moved to'.format( event.path, event.name)) if self._is_run_watcher(event.path): self.invalidate_vmachine_status(event.name) return vpool_path = '/mnt/' + self.get_vpool_for_vm(event.pathname) if vpool_path == '/mnt/': self._logger.warning( 'Vmachine not on vpool or invalid xml format for {0}'. format(event.pathname)) if os.path.exists(vpool_path): machine_id = System.get_my_machine_id() target_path = vpool_path + '/' + machine_id + '/' target_xml = target_path + event.name if not os.path.exists(target_path): os.mkdir(target_path) shutil.copy2(event.pathname, target_xml) except Exception as exception: self._logger.error( 'Exception during process_IN_MOVED_TO: {0}'.format( str(exception)), print_msg=True)
def tick(self): """ Runs one iteration of the scheduler. This is guarded with a distributed lock """ self._logger.debug('DS executing tick') try: self._has_lock = False with self._mutex: # noinspection PyProtectedMember node_now = current_app._get_current_object().now() node_timestamp = time.mktime(node_now.timetuple()) node_name = System.get_my_machine_id() try: lock = self._persistent.get(self._lock_name) except KeyNotFoundException: lock = None if lock is None: # There is no lock yet, so the lock is acquired self._has_lock = True self._logger.debug('DS there was no lock in tick') else: if lock['name'] == node_name: # The current node holds the lock self._logger.debug('DS keeps own lock') self._has_lock = True elif node_timestamp - lock[ 'timestamp'] > DistributedScheduler.TIMEOUT: # The current lock is timed out, so the lock is stolen self._logger.debug( 'DS last lock refresh is {0}s old'.format( node_timestamp - lock['timestamp'])) self._logger.debug('DS stealing lock from {0}'.format( lock['name'])) self._load_schedule() self._has_lock = True else: self._logger.debug('DS lock is not ours') if self._has_lock is True: lock = {'name': node_name, 'timestamp': node_timestamp} self._logger.debug('DS refreshing lock') self._persistent.set(self._lock_name, lock) if self._has_lock is True: self._logger.debug('DS executing tick workload') remaining_times = [] try: for entry in self.schedule.itervalues(): next_time_to_run = self.maybe_due( entry, self.publisher) if next_time_to_run: remaining_times.append(next_time_to_run) except RuntimeError: pass self._logger.debug('DS executing tick workload - done') return min(remaining_times + [self.max_interval]) else: return self.max_interval except Exception as ex: self._logger.debug('DS got error during tick: {0}'.format(ex)) return self.max_interval
def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name): """ Removes a node from a cluster, the old node will become a slave :param cluster_name: The name of the cluster to shrink :param deleted_node_ip: The ip of the node that should be deleted :param remaining_node_ip: The ip of a remaining node """ logger.debug('Shrinking cluster "{0}" from {1}'.format( cluster_name, deleted_node_ip)) current_client = SSHClient(remaining_node_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, current_client): raise RuntimeError( 'Cluster "{0}" unhealthy, aborting shrink'.format( cluster_name)) old_client = SSHClient(deleted_node_ip, username='******') node_name = System.get_my_machine_id(old_client) node_id = None for item in current_client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() if info['name'] == node_name: node_id = info['id'] if node_id is None: raise RuntimeError( 'Could not locate {0} in the cluster'.format(deleted_node_ip)) current_client.run('etcdctl member remove {0}'.format(node_id)) EtcdInstaller.deploy_to_slave(remaining_node_ip, deleted_node_ip, cluster_name) EtcdInstaller.wait_for_cluster(cluster_name, current_client) logger.debug('Shrinking cluster "{0}" from {1} completed'.format( cluster_name, deleted_node_ip))
def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name): """ Removes a node from a cluster, the old node will become a slave :param cluster_name: The name of the cluster to shrink :param deleted_node_ip: The ip of the node that should be deleted :param remaining_node_ip: The ip of a remaining node """ logger.debug('Shrinking cluster "{0}" from {1}'.format(cluster_name, deleted_node_ip)) current_client = SSHClient(remaining_node_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, current_client): raise RuntimeError('Cluster "{0}" unhealthy, aborting shrink'.format(cluster_name)) old_client = SSHClient(deleted_node_ip, username='******') node_name = System.get_my_machine_id(old_client) node_id = None for item in current_client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() if info['name'] == node_name: node_id = info['id'] if node_id is None: raise RuntimeError('Could not locate {0} in the cluster'.format(deleted_node_ip)) current_client.run('etcdctl member remove {0}'.format(node_id)) EtcdInstaller.deploy_to_slave(remaining_node_ip, deleted_node_ip, cluster_name) EtcdInstaller.wait_for_cluster(cluster_name, current_client) logger.debug('Shrinking cluster "{0}" from {1} completed'.format(cluster_name, deleted_node_ip))
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ machine_id = System.get_my_machine_id() key = '/ovs/framework/hosts/{0}/versions'.format(machine_id) data = Configuration.get(key) if Configuration.exists(key) else {} migrators = [] path = '/'.join([os.path.dirname(__file__), 'migration']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(module): if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: migrators.append((member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version Configuration.set(key, data)
def wrapped(*args, **kwargs): if lock_type == 'local': _mutex = file_mutex(key) elif lock_type == 'cluster': _mutex = volatile_mutex(key) else: raise ValueError( 'Lock type {0} is not supported!'.format(lock_type)) try: _mutex.acquire(wait=0.005) local_sr = System.get_my_storagerouter() CacheHelper.set(key=key, item={ 'ip': local_sr.ip, 'hostname': local_sr.name }, expire_time=60) return func(*args, **kwargs) except (NoFileLockAvailableException, NoVolatileLockAvailableException): if callback is None: return else: executor_info = None start = time.time() while executor_info is None: # Calculated guesswork. If a callback function would be expected, the acquire has happened for another executor the volatilekey should be set eventually # However by setting it after the acquire, the callback executor and original method executor can race between fetch and set # A better implementation would be relying on the fwk ensure_single_decorator as they check for various races themselves # This is just a poor mans, temporary implementation if start - time.time() > 5: raise ValueError( 'Timed out after 5 seconds while fetching the information about the executor.' ) try: executor_info = CacheHelper.get(key=key) except: pass callback_func = callback.__func__ if isinstance( callback, staticmethod) else callback argnames = inspect.getargspec(callback_func)[0] arguments = list(args) kwargs.update({'test_name': func.__name__}) if executor_info is not None: kwargs.update(executor_info) if 'result_handler' in argnames: result_handler = kwargs.get('result_handler') for index, arg in enumerate(arguments): if isinstance(arg, HCResults.HCResultCollector): result_handler = arguments.pop(index) break if result_handler is None: raise TypeError( 'Expected an instance of {}'.format( type(HCResults.HCResultCollector))) kwargs['result_handler'] = result_handler return callback_func(*tuple(arguments), **kwargs) finally: _mutex.release()
def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param base_dir: Base directory that will hold the db and tlogs :type base_dir: str :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :return: Ports used by arakoon cluster :rtype: dict """ ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(new_ip, {log_dir: True, home_dir: False, tlog_dir: False}) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def pulse(): """ Update the heartbeats for the Current Routers :return: None """ logger = Logger('extensions-generic') machine_id = System.get_my_machine_id() current_time = int(time.time()) routers = StorageRouterList.get_storagerouters() for node in routers: if node.machine_id == machine_id: with volatile_mutex('storagerouter_heartbeat_{0}'.format( node.guid)): node_save = StorageRouter(node.guid) node_save.heartbeats['process'] = current_time node_save.save() StorageRouterController.ping.s( node.guid, current_time).apply_async( routing_key='sr.{0}'.format(machine_id)) else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats[ 'process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d '{0}'".format( node.name.replace(r"'", r"'\''")), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache')
def __init__(self): """ Initializes the client """ # Safe calls self._node_id = System.get_my_machine_id().replace(r"'", r"'\''") # Alba is currently always installed but the Alba version/package info is located in the SDM section self._package_manager = PackageFactory.get_manager() self._service_manager = ServiceFactory.get_manager() self._service_type = ServiceFactory.get_service_type() if self._service_type != 'systemd': raise NotImplementedError('Only Systemd is supported') # Potential failing calls self._cluster_id = self.get_config_key( self.LOCATION_CLUSTER_ID, fallback=[CONFIG_STORE_LOCATION, 'cluster_id']) self.interval = self.get_config_key( self.LOCATION_INTERVAL, fallback=[self.FALLBACK_CONFIG, self.KEY_INTERVAL], default=self.DEFAULT_INTERVAL) self._openvpn_service_name = 'openvpn@ovs_{0}-{1}'.format( self._cluster_id, self._node_id) # Calls to look out for. These could still be None when using them self._storagerouter = None self._client = None self._set_storagerouter() self._set_client() # Safe call, start caching self.caching = SupportAgentCache(self)
def check_dtl(result_handler): """ Checks the dtl for all vdisks on the local node :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: None :rtype: NoneType """ # Fetch vdisks hosted on this machine local_sr = System.get_my_storagerouter() if len(local_sr.vdisks_guids) == 0: return result_handler.skip('No VDisks present in cluster.') for vdisk_guid in local_sr.vdisks_guids: vdisk = VDisk(vdisk_guid) vdisk.invalidate_dynamics(['dtl_status', 'info']) if vdisk.dtl_status == 'ok_standalone' or vdisk.dtl_status == 'disabled': result_handler.success('VDisk {0}s DTL is disabled'.format(vdisk.name), code=ErrorCodes.volume_dtl_standalone) elif vdisk.dtl_status == 'ok_sync': result_handler.success('VDisk {0}s DTL is enabled and running.'.format(vdisk.name), code=ErrorCodes.volume_dtl_ok) elif vdisk.dtl_status == 'degraded': result_handler.warning('VDisk {0}s DTL is degraded.'.format(vdisk.name), code=ErrorCodes.volume_dtl_degraded) elif vdisk.dtl_status == 'checkup_required': result_handler.warning('VDisk {0}s DTL should be configured.'.format(vdisk.name), code=ErrorCodes.volume_dtl_checkup_required) elif vdisk.dtl_status == 'catch_up': result_handler.warning('VDisk {0}s DTL is enabled but still syncing.'.format(vdisk.name), code=ErrorCodes.volume_dtl_catch_up) else: result_handler.warning('VDisk {0}s DTL has an unknown status: {1}.'.format(vdisk.name, vdisk.dtl_status), code=ErrorCodes.volume_dtl_unknown)
def pulse(): """ Update the heartbeats for the Current Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') machine_id = System.get_my_machine_id() current_time = int(time.time()) routers = StorageRouterList.get_storagerouters() for node in routers: if node.machine_id == machine_id: with volatile_mutex('storagerouter_heartbeat_{0}'.format(node.guid)): node_save = StorageRouter(node.guid) node_save.heartbeats['process'] = current_time node_save.save() StorageRouterController.ping.s(node.guid, current_time).apply_async(routing_key='sr.{0}'.format(machine_id)) else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d '{0}'".format(node.name.replace(r"'", r"'\''")), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache')
def add_services(client, node_type, logger): """ Add the services required by the OVS cluster :param client: Client on which to add the services :type client: ovs.extensions.generic.sshclient.SSHClient :param node_type: Type of node ('master' or 'extra') :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ Toolbox.log(logger=logger, messages='Adding services') services = {} worker_queue = System.get_my_machine_id(client=client) if node_type == 'master': worker_queue += ',ovs_masters' services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue}, 'scheduled-tasks': {}, 'webapp-api': {}, 'volumerouter-consumer': {}}) services.update({'workers': {'WORKER_QUEUE': worker_queue}, 'watcher-framework': {}}) for service_name, params in services.iteritems(): if not ServiceManager.has_service(service_name, client): Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name)) ServiceManager.add_service(name=service_name, params=params, client=client)
def pulse(): """ Update the heartbeats for all Storage Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format(EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'), EtcdConfiguration.get('/ovs/framework/messagequeue|user'), EtcdConfiguration.get('/ovs/framework/messagequeue|password'), EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))) celery_path = OSManager.get_path('celery') worker_states = check_output("{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache') node.save()
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter( data={ 'name': 'Openstack', 'description': 'test', 'username': OVSPluginTestCase.CINDER_USER, 'password': OVSPluginTestCase.CINDER_PASS, 'ip': OVSPluginTestCase.CINDER_CONTROLLER, 'port': 80, 'type': 'OPENSTACK', 'metadata': { 'integratemgmt': True } }) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug('Creating vpool') parameters = { 'storagerouter_ip': OVSPluginTestCase.ip, 'vpool_name': OVSPluginTestCase.VPOOL_NAME, 'type': 'local', 'storage_ip': '127.0.0.1', # KVM 'vrouter_port': OVSPluginTestCase.VPOOL_PORT, 'integrate_vpool': True, 'connection_host': OVSPluginTestCase.ip, 'connection_port': OVSPluginTestCase.VPOOL_PORT, 'connection_username': '', 'connection_password': '', 'connection_backend': {}, 'readcache_size': 50, 'writecache_size': 50 } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(OVSPluginTestCase.VPOOL_NAME) if vpool is not None: self._debug('vpool {0} created'.format( OVSPluginTestCase.VPOOL_NAME)) try: os.listdir(OVSPluginTestCase.VPOOL_MOUNTPOINT) return vpool except Exception as ex: # either it doesn't exist, or we don't have permission self._debug('vpool not ready yet {0}'.format(str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError( 'Vpool {0} was not modeled correctly or did not start.'.format( OVSPluginTestCase.VPOOL_NAME))
def run_event_consumer(): """ Check whether to run the event consumer """ rmq_config = RawConfigParser() rmq_config.read(os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) machine_id = System.get_my_machine_id() return rmq_config.has_section(machine_id)
def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name): ai = ArakoonInstaller() ai.load_config_from(cluster_name, remaining_node_ip) client = SSHClient.load(deleted_node_ip) deleted_node_id = System.get_my_machine_id(client) ai.delete_dir_structure(client) ai.remove_node_from_config(deleted_node_id) ai.upload_config_for(cluster_name)
def invalidate_vmachine_status(self, name): if not name.endswith('.xml'): return devicename = '{0}/{1}'.format(System.get_my_machine_id(), name) vm = VMachineList().get_by_devicename_and_vpool(devicename, None) if vm: vm.invalidate_dynamics() logger.debug('Hypervisor status invalidated for: {0}'.format(name))
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT): for cluster_name in Configuration.list(ArakoonInstaller.CONFIG_ROOT): config = ArakoonClusterConfig(cluster_name, False) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) ports = System.get_free_ports(Configuration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) ArakoonInstaller._logger.debug(' Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters)) return ports
def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param base_dir: Base directory that will hold the db and tlogs :type base_dir: str :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :return: Ports used by arakoon cluster :rtype: dict """ ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir]) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_sinks=LogHandler.get_sink_path('arakoon_server'), crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'), home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def extend_cluster(master_ip, new_ip, cluster_name): """ Extends a cluster to a given new node :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip)) client = SSHClient(master_ip, username='******') if not EtcdInstaller._is_healty(cluster_name, client): raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name)) cluster_members = client.run('etcdctl member list').splitlines() for cluster_member in cluster_members: if EtcdInstaller.SERVER_URL.format(new_ip) in cluster_member: logger.info('Node {0} already member of etcd cluster'.format(new_ip)) return current_cluster = [] for item in client.run('etcdctl member list').splitlines(): info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict() current_cluster.append('{0}={1}'.format(info['name'], info['peer'])) client = SSHClient(new_ip, username='******') node_name = System.get_my_machine_id(client) current_cluster.append('{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) EtcdInstaller.stop(cluster_name, client) # Stop a possible proxy service ServiceManager.add_service(base_name, client, params={'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': ','.join(current_cluster), 'INITIAL_STATE': 'existing', 'INITIAL_PEERS': ''}, target_name=target_name) master_client = SSHClient(master_ip, username='******') master_client.run('etcdctl member add {0} {1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip))) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
def create_config(self, cluster_name, ip, client_port, messaging_port, plugins=None): """ Creates initial config object causing this host to be master :param cluster_name: unique name for this arakoon cluster used in paths :param ip: ip on which service should listen :param client_port: :param messaging_port: :param plugins: optional arakoon plugins :return: """ client = SSHClient.load(ip) node_name = System.get_my_machine_id(client) base_dir = System.read_remote_config(client, 'ovs.core.db.arakoon.location') self.clear_config() self.config = ClusterConfig(base_dir, cluster_name, 'info', plugins) self.config.nodes.append(ClusterNode(node_name, ip, client_port, messaging_port)) self.config.target_ip = ip
def run_event_consumer(): """ Check whether to run the event consumer """ my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(System.get_my_machine_id())) for endpoint in EtcdConfiguration.get('/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): return True return False
def create_cluster(cluster_name, ip): """ Creates a cluster :param base_dir: Base directory that should contain the data :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip)) client = SSHClient(ip, username='******') node_name = System.get_my_machine_id(client) data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name) wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name) abs_paths = [data_dir, wal_dir] client.dir_delete(abs_paths) client.dir_create(abs_paths) client.dir_chmod(abs_paths, 0755, recursive=True) client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True) base_name = 'ovs-etcd' target_name = 'ovs-etcd-{0}'.format(cluster_name) ServiceManager.add_service( base_name, client, params={ 'CLUSTER': cluster_name, 'NODE_ID': node_name, 'DATA_DIR': data_dir, 'WAL_DIR': wal_dir, 'SERVER_URL': EtcdInstaller.SERVER_URL.format(ip), 'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(ip), 'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'), 'INITIAL_CLUSTER': '{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(ip)), 'INITIAL_STATE': 'new', 'INITIAL_PEERS': '-initial-advertise-peer-urls {0}'.format( EtcdInstaller.SERVER_URL.format(ip)) }, target_name=target_name) EtcdInstaller.start(cluster_name, client) EtcdInstaller.wait_for_cluster(cluster_name, client) logger.debug('Creating cluster "{0}" on {1} completed'.format( cluster_name, ip))
def start(self, daemon=True): """ start all nodes in the cluster """ from ovs.extensions.db.arakoon.arakoon.CheckArakoonTlogMark import CheckArakoonTlogMark CheckArakoonTlogMark().fixtlogs(self._clusterName, always_stop=True) node_name = System.get_my_machine_id() self._start_one_ex(node_name, daemon)
def _gatherlocalnodes(self, cluster): """ gather all localnodes for all clusters """ localnodes = [System.get_my_machine_id()] # cluster.listLocalNodes() CheckArakoonTlogMark._speak('Found local nodes {0}'.format(localnodes)) for localnode in localnodes: self._localnodesfiles[localnode] = dict() self._localnodesfiles[localnode]['cluster'] = cluster
def start(self, daemon=True): """ start all nodes in the cluster """ from ovs.extensions.db.arakoon.CheckArakoonTlogMark import CheckArakoonTlogMark CheckArakoonTlogMark().fixtlogs(self._clusterName, always_stop=True) node_name = System.get_my_machine_id() self._start_one_ex(node_name, daemon)
def tick(self): """ Runs one iteration of the scheduler. This is guarded with a distributed lock """ self._has_lock = False try: logger.debug('DS executing tick') self._mutex.acquire(wait=10) node_now = current_app._get_current_object().now() node_timestamp = time.mktime(node_now.timetuple()) node_name = System.get_my_machine_id() try: lock = self._persistent.get('{0}_lock'.format(self._namespace)) except KeyNotFoundException: lock = None if lock is None: # There is no lock yet, so the lock is acquired self._has_lock = True logger.debug('DS there was no lock in tick') else: if lock['name'] == node_name: # The current node holds the lock logger.debug('DS keeps own lock') self._has_lock = True elif node_timestamp - lock['timestamp'] > DistributedScheduler.TIMEOUT: # The current lock is timed out, so the lock is stolen logger.debug('DS last lock refresh is {0}s old'.format( node_timestamp - lock['timestamp'])) logger.debug( 'DS stealing lock from {0}'.format(lock['name'])) self._load_schedule() self._has_lock = True else: logger.debug('DS lock is not ours') if self._has_lock is True: lock = {'name': node_name, 'timestamp': node_timestamp} logger.debug('DS refreshing lock') self._persistent.set('{0}_lock'.format(self._namespace), lock) finally: self._mutex.release() if self._has_lock is True: logger.debug('DS executing tick workload') remaining_times = [] try: for entry in self.schedule.itervalues(): next_time_to_run = self.maybe_due(entry, self.publisher) if next_time_to_run: remaining_times.append(next_time_to_run) except RuntimeError: pass logger.debug('DS executing tick workload - done') return min(remaining_times + [self.max_interval]) else: return self.max_interval
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter(data={'name':'Openstack', 'description':'test', 'username':CINDER_USER, 'password':CINDER_PASS, 'ip':CINDER_CONTROLLER, 'port':80, 'type':'OPENSTACK', 'metadata':{'integratemgmt':True}}) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug('Creating vpool') backend_type = 'local' fields = ['storage_ip', 'vrouter_port'] parameters = {'storagerouter_ip': IP, 'vpool_name': VPOOL_NAME, 'type': 'local', 'mountpoint_bfs': VPOOL_BFS, 'mountpoint_temp': VPOOL_TEMP, 'mountpoint_md': VPOOL_MD, 'mountpoint_readcaches': [VPOOL_READCACHE], 'mountpoint_writecaches': [VPOOL_WRITECACHE], 'mountpoint_foc': VPOOL_FOC, 'storage_ip': '127.0.0.1', #KVM 'vrouter_port': VPOOL_PORT, 'integrate_vpool': True, 'connection_host': IP, 'connection_port': VPOOL_PORT, 'connection_username': '', 'connection_password': '', 'connection_backend': {}, } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug('vpool %s created' % VPOOL_NAME) try: os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: #either it doesn't exist, or we don't have permission self._debug('vpool not ready yet %s' % (str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError('Vpool %s was not modeled correctly or did not start.' % VPOOL_NAME)
def run_event_consumer(): """ Check whether to run the event consumer """ rmq_config = RawConfigParser() rmq_config.read( os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) machine_id = System.get_my_machine_id() return rmq_config.has_section(machine_id)
def _get_test_name(): """ Retrieve a structured environment test name :returns: a structured environment based test name :rtype: str """ number_of_nodes = len(StoragerouterHelper.get_storagerouters()) split_ip = System.get_my_storagerouter().ip.split('.') return str(number_of_nodes) + 'N-' + split_ip[2] + '.' + split_ip[3]
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list(ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error(' Could not load port information of cluster {0}'.format(cluster_name)) ports = System.get_free_ports(EtcdConfiguration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug(' Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters)) return ports
def update_components(components): """ Initiate the update through commandline for all StorageRouters This is called upon by the API :return: None """ components = [component.strip() for component in components] root_client = SSHClient(endpoint=System.get_my_storagerouter(), username='******') root_client.run(['ovs', 'update', ','.join(components)])
def _create_vpool(self): """ Needed to actually run tests on This is not actually a test of "Add Vpool to OVS", so any failure here will be reported as a setUp error and no tests will run """ pmachine = System.get_my_storagerouter().pmachine mgmt_center = MgmtCenter( data={ "name": "Openstack", "description": "test", "username": CINDER_USER, "password": CINDER_PASS, "ip": CINDER_CONTROLLER, "port": 80, "type": "OPENSTACK", "metadata": {"integratemgmt": True}, } ) mgmt_center.save() pmachine.mgmtcenter = mgmt_center pmachine.save() self._debug("Creating vpool") parameters = { "storagerouter_ip": IP, "vpool_name": VPOOL_NAME, "type": "local", "storage_ip": "127.0.0.1", # KVM "vrouter_port": VPOOL_PORT, "integrate_vpool": True, "connection_host": IP, "connection_port": VPOOL_PORT, "connection_username": "", "connection_password": "", "connection_backend": {}, "readcache_size": 50, "writecache_size": 50, } StorageRouterController.add_vpool(parameters) attempt = 0 while attempt < 10: vpool = VPoolList.get_vpool_by_name(VPOOL_NAME) if vpool is not None: self._debug("vpool {0} created".format(VPOOL_NAME)) try: os.listdir(VPOOL_MOUNTPOINT) return vpool except Exception as ex: # either it doesn't exist, or we don't have permission self._debug("vpool not ready yet {0}".format(str(ex))) pass attempt += 1 time.sleep(2) raise RuntimeError("Vpool {0} was not modeled correctly or did not start.".format(VPOOL_NAME))
def create_cluster(cluster_name, ip, base_dir, plugins=None, locked=True): """ Creates a cluster :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts) :param plugins: Plugins that should be added to the configuration file :param base_dir: Base directory that should contain the data and tlogs :param ip: IP address of the first node of the new cluster :param cluster_name: Name of the cluster """ logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip)) base_dir = base_dir.rstrip('/') client = SSHClient(ip) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name) ArakoonInstaller.archive_existing_arakoon_data( ip, home_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( ip, tlog_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import VolatileMutex port_mutex = VolatileMutex( 'arakoon_install_ports_{0}'.format(ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) config = ArakoonClusterConfig(cluster_name, plugins) config.nodes.append( ArakoonNodeConfig(name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() logger.debug('Creating cluster {0} on {1} completed'.format( cluster_name, ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def extend_cluster(master_ip, new_ip, cluster_name, base_dir): """ Extends a cluster to a given new node :param base_dir: Base directory that will hold the db and tlogs :param cluster_name: Name of the cluster to be extended :param new_ip: IP address of the node to be added :param master_ip: IP of one of the already existing nodes """ logger.debug('Extending cluster {0} from {1} to {2}'.format( cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') from ovs.extensions.generic.volatilemutex import VolatileMutex port_mutex = VolatileMutex('arakoon_install_ports_{0}'.format(new_ip)) config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format( base_dir, cluster_name) log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format( base_dir, cluster_name) ArakoonInstaller.archive_existing_arakoon_data( new_ip, home_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( new_ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''), cluster_name) ArakoonInstaller.archive_existing_arakoon_data( new_ip, tlog_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name) try: port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append( ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_dir=log_dir, home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: port_mutex.release() logger.debug('Extending cluster {0} from {1} to {2} completed'.format( cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def run_event_consumer(): """ Check whether to run the event consumer """ my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format( System.get_my_machine_id())) for endpoint in EtcdConfiguration.get( '/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): return True return False
def _process_task(task, metadata, servicemanager): """ Processes a task """ try: SupportAgent._logger.debug('Processing: {0}'.format(task)) cid = Configuration.get('/ovs/framework/cluster_id').replace( r"'", r"'\''") nid = System.get_my_machine_id().replace(r"'", r"'\''") if task == 'OPEN_TUNNEL': if servicemanager == 'upstart': check_output('service openvpn stop', shell=True) else: check_output( "systemctl stop 'openvpn@ovs_{0}-{1}' || true".format( cid, nid), shell=True) check_output('rm -f /etc/openvpn/ovs_*', shell=True) for filename, contents in metadata['files'].iteritems(): with open(filename, 'w') as the_file: the_file.write(base64.b64decode(contents)) if servicemanager == 'upstart': check_output('service openvpn start', shell=True) else: check_output( "systemctl start 'openvpn@ovs_{0}-{1}'".format( cid, nid), shell=True) elif task == 'CLOSE_TUNNEL': if servicemanager == 'upstart': check_output('service openvpn stop', shell=True) else: check_output("systemctl stop 'openvpn@ovs_{0}-{1}'".format( cid, nid), shell=True) check_output('rm -f /etc/openvpn/ovs_*', shell=True) elif task == 'UPLOAD_LOGFILES': logfile = check_output('ovs collect logs', shell=True).strip() check_output( "mv '{0}' '/tmp/{1}'; curl -T '/tmp/{1}' 'ftp://{2}' --user '{3}:{4}'; rm -f '{0}' '/tmp/{1}'" .format(logfile.replace(r"'", r"'\''"), metadata['filename'].replace(r"'", r"'\''"), metadata['endpoint'].replace(r"'", r"'\''"), metadata['user'].replace(r"'", r"'\''"), metadata['password'].replace(r"'", r"'\''")), shell=True) else: raise RuntimeError('Unknown task') except Exception, ex: SupportAgent._logger.exception( 'Unexpected error while processing task {0} (data: {1}): {2}'. format(task, json.dumps(metadata), ex)) raise
def get_healthcheck_version(): """ Gets the installed healthcheck version :return: version number of the installed healthcheck :rtype: str """ client = SSHClient(System.get_my_storagerouter()) package_name = 'openvstorage-health-check' package_manager = PackageFactory.get_manager() packages = package_manager.get_installed_versions( client=client, package_names=[package_name]) return packages.get(package_name, 'unknown')
def install_plugins(): """ (Re)load plugins """ if ServiceManager.has_service('ovs-watcher-framework', SSHClient('127.0.0.1', username='******')): # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed from ovs.dal.lists.storagerouterlist import StorageRouterList clients = [] try: for storagerouter in StorageRouterList.get_storagerouters(): clients.append(SSHClient(storagerouter, username='******')) except UnableToConnectException: raise RuntimeError('Not all StorageRouters are reachable') for client in clients: for service_name in ['watcher-framework', 'memcached']: ServiceManager.stop_service(service_name, client=client) wait = 30 while wait > 0: if ServiceManager.get_service_status( service_name, client=client) is False: break time.sleep(1) wait -= 1 if wait == 0: raise RuntimeError( 'Could not stop service: {0}'.format(service_name)) for client in clients: for service_name in ['memcached', 'watcher-framework']: ServiceManager.start_service(service_name, client=client) wait = 30 while wait > 0: if ServiceManager.get_service_status( service_name, client=client) is True: break time.sleep(1) wait -= 1 if wait == 0: raise RuntimeError( 'Could not start service: {0}'.format( service_name)) from ovs.dal.helpers import Migration Migration.migrate() from ovs.lib.helpers.toolbox import Toolbox ip = System.get_my_storagerouter().ip functions = Toolbox.fetch_hooks('plugin', 'postinstall') for function in functions: function(ip=ip)
def __init__(self, path=None, client=None): """ :param path: path of the fstab file :type path: str """ if path: self._path = path else: self._path = self.DEFAULT_PATH if client is None: client = SSHClient(System.get_my_storagerouter(), username='******') self.client = client