def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True, filesystem=False, ports=None): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param base_dir: Base directory that will hold the db and tlogs :type base_dir: str :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :param filesystem: Indicates whether the configuration should be on the filesystem or in a configuration cluster :type filesystem: bool :param ports: A list of ports to be used for this cluster's node :type ports: list :return: Ports used by arakoon cluster :rtype: dict """ ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') config = ArakoonClusterConfig(cluster_name, filesystem) config.load_config(master_ip) client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir]) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip)) port_mutex.acquire(wait=60) if ports is None: ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_sinks=LogHandler.get_sink_path('arakoon_server'), crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'), home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config, filesystem=filesystem) finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1], 'ips': [node.ip for node in config.nodes]}
def worker_process_init_handler(args=None, kwargs=None, **kwds): """ Hook for process init """ _ = args, kwargs, kwds VolatileFactory.store = None PersistentFactory.store = None LogHandler.get('extensions', name='ovs_extensions') # Initiate extensions logger
def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True): """ Extends a cluster to a given new node :param master_ip: IP of one of the already existing nodes :type master_ip: str :param new_ip: IP address of the node to be added :type new_ip: str :param cluster_name: Name of the cluster to be extended :type cluster_name: str :param base_dir: Base directory that will hold the db and tlogs :type base_dir: str :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :return: Ports used by arakoon cluster :rtype: dict """ ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip)) base_dir = base_dir.rstrip('/') config = ArakoonClusterConfig(cluster_name) config.load_config() client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER) node_name = System.get_my_machine_id(client) home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir]) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip)) port_mutex.acquire(wait=60) ports = ArakoonInstaller._get_free_ports(client) if node_name not in [node.name for node in config.nodes]: config.nodes.append(ArakoonNodeConfig(name=node_name, ip=new_ip, client_port=ports[0], messaging_port=ports[1], log_sinks=LogHandler.get_sink_path('arakoon_server'), crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'), home=home_dir, tlog_dir=tlog_dir)) ArakoonInstaller._deploy(config) finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip)) return {'client_port': ports[0], 'messaging_port': ports[1]}
def authenticated(force=False): """ Decorator to make that a login is executed in case the current session isn't valid anymore :param force: Force a (re)login, as some methods also work when not logged in """ logger = LogHandler.get('extensions', name='vmware sdk') def wrapper(function): def new_function(self, *args, **kwargs): self.__doc__ = function.__doc__ try: if force: self._login() return function(self, *args, **kwargs) except WebFault as fault: if 'The session is not authenticated' in str(fault): logger.debug('Received WebFault authentication failure, logging in...') self._login() return function(self, *args, **kwargs) raise except NotAuthenticatedException: logger.debug('Received NotAuthenticatedException, logging in...') self._login() return function(self, *args, **kwargs) return new_function return wrapper
def log_slow_calls(f): """ Wrapper to print duration when call takes > 1s :param f: Function to wrap :return: Wrapped function """ logger = LogHandler.get('extensions', name='etcdconfiguration') def new_function(*args, **kwargs): """ Execute function :return: Function output """ start = time.time() try: return f(*args, **kwargs) finally: key_info = '' if 'key' in kwargs: key_info = ' (key: {0})'.format(kwargs['key']) elif len(args) > 0: key_info = ' (key: {0})'.format(args[0]) duration = time.time() - start if duration > 1: logger.warning('Call to {0}{1} took {2}s'.format(f.__name__, key_info, duration)) new_function.__name__ = f.__name__ new_function.__module__ = f.__module__ return new_function
def _clean_cache(): loghandler = LogHandler.get('celery', name='celery') loghandler.info('Executing celery "clear_cache" startup script...') from ovs.lib.helpers.decorators import ENSURE_SINGLE_KEY active = inspect().active() active_tasks = [] if active is not None: for tasks in active.itervalues(): active_tasks += [task['id'] for task in tasks] cache = PersistentFactory.get_client() for key in cache.prefix(ENSURE_SINGLE_KEY): try: with volatile_mutex(name=key, wait=5): entry = cache.get(key) values = entry.get('values', []) new_values = [] for v in values: task_id = v.get('task_id') if task_id is not None and task_id in active_tasks: new_values.append(v) if len(new_values) > 0: entry['values'] = new_values cache.set(key, entry) loghandler.info('Updated key {0}'.format(key)) else: cache.delete(key) loghandler.info('Deleted key {0}'.format(key)) except KeyNotFoundException: pass loghandler.info('Executing celery "clear_cache" startup script... done')
def pulse(): """ Update the heartbeats for the Current Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') machine_id = System.get_my_machine_id() current_time = int(time.time()) routers = StorageRouterList.get_storagerouters() for node in routers: if node.machine_id == machine_id: with volatile_mutex('storagerouter_heartbeat_{0}'.format(node.guid)): node_save = StorageRouter(node.guid) node_save.heartbeats['process'] = current_time node_save.save() StorageRouterController.ping.s(node.guid, current_time).apply_async(routing_key='sr.{0}'.format(machine_id)) else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d '{0}'".format(node.name.replace(r"'", r"'\''")), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache')
def process_exception(self, request, exception): """ Logs information about the given error """ _ = self, request logger = LogHandler.get('api', 'middleware') if OVSMiddleware.is_own_httpexception(exception): return HttpResponse(exception.data, status=exception.status_code, content_type='application/json') if isinstance(exception, MissingMandatoryFieldsException): return HttpResponse(json.dumps({ 'error': 'invalid_data', 'error_description': exception.message }), status=400, content_type='application/json') logger.exception( 'An unhandled exception occurred: {0}'.format(exception)) return HttpResponse(json.dumps({ 'error': 'internal_server', 'error_description': exception.message }), status=500, content_type='application/json')
def log_slow_calls(f): """ Wrapper to print duration when call takes > 1s :param f: Function to wrap :return: Wrapped function """ logger = LogHandler.get("extensions", name="etcdconfiguration") def new_function(*args, **kwargs): """ Execute function :return: Function output """ start = time.time() try: return f(*args, **kwargs) finally: key_info = "" if "key" in kwargs: key_info = " (key: {0})".format(kwargs["key"]) elif len(args) > 0: key_info = " (key: {0})".format(args[0]) duration = time.time() - start if duration > 1: logger.warning("Call to {0}{1} took {2}s".format(f.__name__, key_info, duration)) new_function.__name__ = f.__name__ new_function.__module__ = f.__module__ return new_function
def process_exception(self, request, exception): """ Logs information about the given error """ _ = self, request logger = LogHandler.get('api', 'middleware') logger.exception('An unhandled exception occurred: {0}'.format(exception))
def new_function(*args, **kwargs): """ Wrapped function """ request = _find_request(args) method_args = list(args)[:] method_args = method_args[method_args.index(request) + 1:] # Log the call metadata = {'meta': dict((str(key), str(value)) for key, value in request.META.iteritems()), 'request': dict((str(key), str(value)) for key, value in request.REQUEST.iteritems()), 'cookies': dict((str(key), str(value)) for key, value in request.COOKIES.iteritems())} _logger = LogHandler.get('log', name='api') _logger.info('[{0}.{1}] - {2} - {3} - {4} - {5}'.format( f.__module__, f.__name__, getattr(request, 'client').user_guid if hasattr(request, 'client') else None, json.dumps(method_args), json.dumps(kwargs), json.dumps(metadata) )) # Call the function start = time.time() return_value = f(*args, **kwargs) duration = time.time() - start if duration > 5 and log_slow is True: logger.warning('API call {0}.{1} took {2}s'.format(f.__module__, f.__name__, round(duration, 2))) return return_value
def new_function(*args, **kwargs): """ Wrapped function """ request = _find_request(args) method_args = list(args)[:] method_args = method_args[method_args.index(request) + 1:] # Log the call metadata = { 'meta': dict((str(key), str(value)) for key, value in request.META.iteritems()), 'request': dict((str(key), str(value)) for key, value in request.REQUEST.iteritems()), 'cookies': dict((str(key), str(value)) for key, value in request.COOKIES.iteritems()) } _logger = LogHandler.get('log', name='api') _logger.info('[{0}.{1}] - {2} - {3} - {4} - {5}'.format( f.__module__, f.__name__, getattr(request, 'client').user_guid if hasattr( request, 'client') else None, json.dumps(method_args), json.dumps(kwargs), json.dumps(metadata))) # Call the function start = time.time() return_value = f(*args, **kwargs) duration = time.time() - start if duration > 5 and log_slow is True: logger.warning('API call {0}.{1} took {2}s'.format( f.__module__, f.__name__, round(duration, 2))) return return_value
def new_function(self, request, *args, **kwargs): """ Wrapped function """ # Log the call metadata = { 'meta': dict((str(key), str(value)) for key, value in request.META.iteritems()), 'request': dict((str(key), str(value)) for key, value in request.REQUEST.iteritems()), 'cookies': dict((str(key), str(value)) for key, value in request.COOKIES.iteritems()) } # Stripping password traces for mtype in metadata: for key in metadata[mtype]: if 'password' in key: metadata[mtype][key] = '**********************' _logger = LogHandler.get('log', name='api') _logger.info('[{0}.{1}] - {2} - {3} - {4} - {5}'.format( f.__module__, f.__name__, getattr(request, 'client').user_guid if hasattr( request, 'client') else None, json.dumps(list(args)), json.dumps(kwargs), json.dumps(metadata))) # Call the function return f(self, request, *args, **kwargs)
class OSManager(object): """ Factory class returning specialized classes """ ImplementationClass = None _logger = LogHandler.get('extensions', name='osmanager') class MetaClass(type): """ Metaclass """ def __getattr__(cls, item): """ Returns the appropriate class """ _ = cls if OSManager.ImplementationClass is None: try: dist_info = check_output('cat /etc/os-release', shell=True) # All OS distribution classes used in below code should share the exact same interface! if 'Ubuntu' in dist_info: OSManager.ImplementationClass = Ubuntu elif 'CentOS Linux' in dist_info: OSManager.ImplementationClass = Centos else: raise RuntimeError( 'There was no known OSManager detected') except Exception as ex: OSManager._logger.exception( 'Error loading OSManager: {0}'.format(ex)) raise return getattr(OSManager.ImplementationClass, item) __metaclass__ = MetaClass
def relay(*args, **kwargs): """ Relays any call to another node. Assume this example: * A user wants to execute a HTTP GET on /api/storagerouters/ ** /api/<call> * He'll have to execute a HTTP GET on /api/relay/<call> ** This will translate to /apt/relay/storagerouters/ Parameters: * Mandatory: ip, port, client_id, client_secret * All other parameters will be passed through to the specified node """ @authenticated() @required_roles(['read']) @load() def _relay(_, ip, port, client_id, client_secret, raw_version, request): path = '/{0}'.format(request.path.replace('/api/relay/', '')) method = request.META['REQUEST_METHOD'].lower() client = OVSClient(ip, port, credentials=(client_id, client_secret), version=raw_version, raw_response=True) if not hasattr(client, method): raise HttpBadRequestException( error='unavailable_call', error_description='Method not available in relay') client_kwargs = {'params': request.GET} if method != 'get': client_kwargs['data'] = request.POST call_response = getattr(client, method)(path, **client_kwargs) response = HttpResponse(call_response.text, content_type='application/json', status=call_response.status_code) for header, value in call_response.headers.iteritems(): response[header] = value response['OVS-Relay'] = '{0}:{1}'.format(ip, port) return response try: return _relay(*args, **kwargs) except Exception as ex: if OVSMiddleware.is_own_httpexception(ex): return HttpResponse(ex.data, status=ex.status_code, content_type='application/json') message = str(ex) status_code = 400 if hasattr(ex, 'detail'): message = ex.detail if hasattr(ex, 'status_code'): status_code = ex.status_code logger = LogHandler.get('api', name='metadata') logger.exception('Error relaying call: {0}'.format(message)) return HttpResponse(json.dumps({ 'error_descirption': message, 'error': 'relay_error' }), content_type='application/json', status=status_code)
def limit(amount, per, timeout): """ Rate-limits the decorated call """ logger = LogHandler.get('api', 'oauth2') def wrap(f): """ Wrapper function """ @wraps(f) def new_function(self, request, *args, **kwargs): """ Wrapped function """ now = time.time() key = 'ovs_api_limit_{0}.{1}_{2}'.format( f.__module__, f.__name__, request.META['HTTP_X_REAL_IP']) client = VolatileFactory.get_client() mutex = volatile_mutex(key) try: mutex.acquire() rate_info = client.get(key, {'calls': [], 'timeout': None}) active_timeout = rate_info['timeout'] if active_timeout is not None: if active_timeout > now: logger.warning( 'Call {0} is being throttled with a wait of {1}'. format(key, active_timeout - now)) raise HttpTooManyRequestsException( error='rate_limit_timeout', error_description= 'Rate limit timeout ({0}s remaining)'.format( round(active_timeout - now, 2))) else: rate_info['timeout'] = None rate_info['calls'] = [ call for call in rate_info['calls'] if call > (now - per) ] + [now] calls = len(rate_info['calls']) if calls > amount: rate_info['timeout'] = now + timeout client.set(key, rate_info) logger.warning( 'Call {0} is being throttled with a wait of {1}'. format(key, timeout)) raise HttpTooManyRequestsException( error='rate_limit_reached', error_description= 'Rate limit reached ({0} in last {1}s)'.format( calls, per)) client.set(key, rate_info) finally: mutex.release() return f(self, request, *args, **kwargs) return new_function return wrap
class MetadataServerClient(object): """ Builds a MDSClient """ _logger = LogHandler.get('extensions', name='storagedriver') storagerouterclient.Logger.setupLogging( LogHandler.load_path('storagerouterclient')) # noinspection PyArgumentList storagerouterclient.Logger.enableLogging() MDS_ROLE = type('MDSRole', (), { 'MASTER': Role.Master, 'SLAVE': Role.Slave }) def __init__(self): """ Dummy init method """ pass @staticmethod def load(service): """ Loads a MDSClient :param service: Service for which the MDSClient needs to be loaded """ if service.storagerouter is None: raise ValueError( 'MDS service {0} does not have a Storage Router linked to it'. format(service.name)) key = service.guid if key not in mdsclient_service_cache: try: # noinspection PyArgumentList client = MDSClient( MDSNodeConfig(address=str(service.storagerouter.ip), port=service.ports[0])) mdsclient_service_cache[key] = client except RuntimeError as ex: MetadataServerClient._logger.error( 'Error loading MDSClient on {0}: {1}'.format( service.storagerouter.ip, ex)) return None return mdsclient_service_cache[key]
def __init__(self, config_type, vpool_guid, storagedriver_id): """ Initializes the class """ if config_type != 'storagedriver': raise RuntimeError('Invalid configuration type. Allowed: storagedriver') storagerouterclient.Logger.setupLogging(LogHandler.load_path('storagerouterclient')) # noinspection PyArgumentList storagerouterclient.Logger.enableLogging() self._logger = LogHandler.get('extensions', name='storagedriver') self.config_type = config_type self.configuration = {} self.key = '/ovs/vpools/{0}/hosts/{1}/config'.format(vpool_guid, storagedriver_id) self.remote_path = Configuration.get_configuration_path(self.key).strip('/') self.is_new = True self.dirty_entries = []
class SupportAgent(object): """ Represents the Support client """ _logger = LogHandler.get('support', name='agent') def __init__(self): """ Initializes the client """ self._enable_support = Configuration.get('/ovs/framework/support|enablesupport') self.interval = Configuration.get('/ovs/framework/support|interval') self._url = 'https://monitoring.openvstorage.com/api/support/heartbeat/' init_info = check_output('cat /proc/1/comm', shell=True) # All service classes used in below code should share the exact same interface! if 'init' in init_info: version_info = check_output('init --version', shell=True) if 'upstart' in version_info: self.servicemanager = 'upstart' else: RuntimeError('There was no known service manager detected in /proc/1/comm') elif 'systemd' in init_info: self.servicemanager = 'systemd' else: raise RuntimeError('There was no known service manager detected in /proc/1/comm') def get_heartbeat_data(self): """ Returns heartbeat data """ data = {'cid': Configuration.get('/ovs/framework/cluster_id'), 'nid': System.get_my_machine_id(), 'metadata': {}, 'errors': []} try: # Versions manager = PackageFactory.get_manager() data['metadata']['versions'] = manager.get_installed_versions() # Fallback to check_output except Exception, ex: data['errors'].append(str(ex)) try: if self.servicemanager == 'upstart': services = check_output('initctl list | grep ovs-', shell=True).strip().splitlines() else: services = check_output('systemctl -l | grep ovs- | tr -s " "', shell=True).strip().splitlines() # Service status service_data = {} for service in services: split = service.strip().split(' ') split = [part.strip() for part in split if part.strip()] while split and not split[0].strip().startswith('ovs-'): split.pop(0) service_data[split[0]] = ' '.join(split[1:]) data['metadata']['services'] = service_data except Exception, ex: data['errors'].append(str(ex))
def __init__(self, nodes): """ Initializes the client """ self._logger = LogHandler.get('extensions', 'memcache store') self._nodes = nodes self._client = memcache.Client(self._nodes, cache_cas=True, socket_timeout=0.5) self._lock = Lock() self._validate = True
def manage_running_tasks(tasklist, timesleep=10): """ Manage a list of running celery task - discard PENDING tasks after a certain timeout - validate RUNNING tasks are actually running :param tasklist: Dictionary of tasks to wait {IP address: AsyncResult} :type tasklist: dict :param timesleep: leep between checks - -for long running tasks it's better to sleep for a longer period of time to reduce number of ssh calls :type timesleep: int :return: results :rtype: dict """ logger = LogHandler.get('lib', name='celery toolbox') ssh_clients = {} tasks_pending = {} tasks_pending_timeout = 1800 # 30 minutes results = {} failed_nodes = [] while len(tasklist.keys()) > 0: for ip, task in tasklist.items(): if task.state in ('SUCCESS', 'FAILURE'): logger.info('Task {0} finished: {1}'.format(task.id, task.state)) results[ip] = task.get(propagate=False) del tasklist[ip] elif task.state == 'PENDING': if task.id not in tasks_pending: tasks_pending[task.id] = time.time() else: task_pending_since = tasks_pending[task.id] if time.time() - task_pending_since > tasks_pending_timeout: logger.warning('Task {0} is pending since {1} on node {2}. Task will be revoked'.format(task.id, datetime.datetime.fromtimestamp(task_pending_since), ip)) revoke(task.id) del tasklist[ip] del tasks_pending[task.id] failed_nodes.append(ip) elif task.state == 'STARTED': if ip not in ssh_clients: ssh_clients[ip] = SSHClient(ip, username='******') client = ssh_clients[ip] if ServiceManager.get_service_status('workers', client) is False: logger.error('Service ovs-workers on node {0} appears halted while there is a task PENDING for it {1}. Task will be revoked.'.format(ip, task.id)) revoke(task.id) del tasklist[ip] failed_nodes.append(ip) else: ping_result = task.app.control.inspect().ping() storage_router = StorageRouterList.get_by_ip(ip) if "celery@{0}".format(storage_router.name) not in ping_result: logger.error('Service ovs-workers on node {0} is not reachable via rabbitmq while there is a task STARTED for it {1}. Task will be revoked.'.format(ip, task.id)) revoke(task.id) del tasklist[ip] failed_nodes.append(ip) if len(tasklist.keys()) > 0: time.sleep(timesleep) return results, failed_nodes
def __init__(self, name, wait=None): """ Creates a volatile mutex object """ self._logger = LogHandler.get('extensions', 'volatile mutex') self._volatile = VolatileFactory.get_client() self.name = name self._has_lock = False self._start = 0 self._wait = wait
def task_postrun_handler(sender=None, task_id=None, task=None, args=None, kwargs=None, **kwds): """ Hook for celery postrun event """ _ = sender, task, args, kwargs, kwds try: MessageController.fire(MessageController.Type.TASK_COMPLETE, task_id) except Exception as ex: loghandler = LogHandler.get('celery', name='celery') loghandler.error('Caught error during postrun handler: {0}'.format(ex))
def relay(*args, **kwargs): """ Relays any call to another node. Assume this example: * A user wants to execute a HTTP GET on /api/storagerouters/ ** /api/<call> * He'll have to execute a HTTP GET on /api/relay/<call> ** This will translate to /apt/relay/storagerouters/ Parameters: * Mandatory: ip, port, client_id, client_secret * All other parameters will be passed through to the specified node """ @authenticated() @required_roles(['read']) @load() def _relay(_, ip, port, client_id, client_secret, raw_version, request): path = '/{0}'.format(request.path.replace('/api/relay/', '')) method = request.META['REQUEST_METHOD'].lower() client = OVSClient(ip, port, credentials=(client_id, client_secret), version=raw_version, raw_response=True) if not hasattr(client, method): raise HttpBadRequestException(error='unavailable_call', error_description='Method not available in relay') client_kwargs = {'params': request.GET} if method != 'get': client_kwargs['data'] = request.POST call_response = getattr(client, method)(path, **client_kwargs) response = HttpResponse(call_response.text, content_type='application/json', status=call_response.status_code) for header, value in call_response.headers.iteritems(): response[header] = value response['OVS-Relay'] = '{0}:{1}'.format(ip, port) return response try: return _relay(*args, **kwargs) except Exception as ex: if OVSMiddleware.is_own_httpexception(ex): return HttpResponse(ex.data, status=ex.status_code, content_type='application/json') message = str(ex) status_code = 400 if hasattr(ex, 'detail'): message = ex.detail if hasattr(ex, 'status_code'): status_code = ex.status_code logger = LogHandler.get('api', name='metadata') logger.exception('Error relaying call: {0}'.format(message)) return HttpResponse(json.dumps({'error_descirption': message, 'error': 'relay_error'}), content_type='application/json', status=status_code)
def __init__(self, *args, **kwargs): """ Initializes the distributed scheduler """ self._logger = LogHandler.get('celery', name='celery beat') self._persistent = PersistentFactory.get_client() self._namespace = 'ovs_celery_beat' self._mutex = volatile_mutex('celery_beat', 10) self._has_lock = False super(DistributedScheduler, self).__init__(*args, **kwargs) self._logger.debug('DS init')
def _log(task, kwargs, storagedriver_id): """ Log an event """ metadata = {'storagedriver': StorageDriverList.get_by_storagedriver_id(storagedriver_id).guid} _logger = LogHandler.get('log', name='volumedriver_event') _logger.info('[{0}.{1}] - {2} - {3}'.format( task.__class__.__module__, task.__class__.__name__, json.dumps(kwargs), json.dumps(metadata) ))
def __init__(self, *args, **kwargs): """ Initializes the distributed scheduler """ self._logger = LogHandler.get('celery', name='celery beat') self._persistent = PersistentFactory.get_client() self._namespace = 'ovs_celery_beat' self._mutex = volatile_mutex('celery_beat', 10) self._schedule_info = {} self._has_lock = False super(DistributedScheduler, self).__init__(*args, **kwargs) self._logger.debug('DS init')
def __init__(self, config_type, vpool_guid, storagedriver_id): """ Initializes the class """ if config_type != 'storagedriver': raise RuntimeError( 'Invalid configuration type. Allowed: storagedriver') storagerouterclient.Logger.setupLogging( LogHandler.load_path('storagerouterclient')) # noinspection PyArgumentList storagerouterclient.Logger.enableLogging() self._logger = LogHandler.get('extensions', name='storagedriver') self.config_type = config_type self.configuration = {} self.key = '/ovs/vpools/{0}/hosts/{1}/config'.format( vpool_guid, storagedriver_id) self.remote_path = Configuration.get_configuration_path( self.key).strip('/') self.is_new = True self.dirty_entries = []
def __init__(self, config_type, vpool_guid, storagedriver_id): """ Initializes the class """ def make_configure(sct): """ section closure :param sct: Section to create configure function for """ return lambda **kwargs: self._add(sct, **kwargs) if config_type != 'storagedriver': raise RuntimeError( 'Invalid configuration type. Allowed: storagedriver') storagerouterclient.Logger.setupLogging( LogHandler.load_path('storagerouterclient')) # noinspection PyArgumentList storagerouterclient.Logger.enableLogging() self._logger = LogHandler.get('extensions', name='storagedriver') self.config_type = config_type self.configuration = {} self.path = '/ovs/vpools/{0}/hosts/{1}/config/{{0}}'.format( vpool_guid, storagedriver_id) self.remote_path = 'etcd://127.0.0.1:2379{0}'.format( self.path.format('')).strip('/') self.is_new = True self.dirty_entries = [] self.params = copy.deepcopy(StorageDriverConfiguration.parameters ) # Never use parameters directly # Fix some manual "I know what I'm doing" overrides backend_connection_manager = 'backend_connection_manager' self.params[self.config_type][backend_connection_manager][ 'optional'].append('s3_connection_strict_consistency') # Generate configure_* methods for section in self.params[self.config_type]: setattr(self, 'configure_{0}'.format(section), make_configure(section))
def _log(task, kwargs, storagedriver_id): """ Log an event """ metadata = { 'storagedriver': StorageDriverList.get_by_storagedriver_id(storagedriver_id).guid } _logger = LogHandler.get('log', name='volumedriver_event') _logger.info('[{0}.{1}] - {2} - {3}'.format(task.__class__.__module__, task.__class__.__name__, json.dumps(kwargs), json.dumps(metadata)))
def __init__(self, host, login, passwd): """ Initializes the SDK """ self._logger = LogHandler.get('extensions', name='vmware sdk') self._host = host self._username = login self._password = passwd self._sessionID = None self._check_session = True self._cache = ObjectCache() self._cache.setduration(weeks=1) self._client = Client('https://{0}/sdk/vimService?wsdl'.format(host), cache=self._cache, cachingpolicy=1) self._client.set_options(location='https://{0}/sdk'.format(host), plugins=[ValueExtender()]) service_reference = self._build_property('ServiceInstance') self._serviceContent = self._client.service.RetrieveServiceContent( service_reference ) # In case of an ESXi host, this would be 'HostAgent' self.is_vcenter = self._serviceContent.about.apiType == 'VirtualCenter' if not self.is_vcenter: self._login() self._esxHost = self._get_object( self._serviceContent.rootFolder, prop_type='HostSystem', traversal={'name': 'FolderTraversalSpec', 'type': 'Folder', 'path': 'childEntity', 'traversal': {'name': 'DatacenterTraversalSpec', 'type': 'Datacenter', 'path': 'hostFolder', 'traversal': {'name': 'DFolderTraversalSpec', 'type': 'Folder', 'path': 'childEntity', 'traversal': {'name': 'ComputeResourceTravelSpec', 'type': 'ComputeResource', 'path': 'host'}}}}, properties=['name'] ).obj_identifier else: # @TODO: We need to extend all calls to specify the ESXi host where the action needs to be executed. # We cannot just assume an ESXi host here, as this is important for certain calls like creating a VM. self._esxHost = None
def __init__(self): self._logger = LogHandler.get('extensions', name='exportfs') self._exports_file = '/etc/exports' self._cmd = ['/usr/bin/sudo', '-u', 'root', '/usr/sbin/exportfs'] self._restart = [ '/usr/bin/sudo', '-u', 'root', '/usr/sbin/exportfs', '-ra' ] self._rpcmountd_stop = [ '/usr/bin/sudo', '-u', 'root', 'pkill', 'rpc.mountd' ] self._rpcmountd_start = [ '/usr/bin/sudo', '-u', 'root', '/usr/sbin/rpc.mountd', '--manage-gids' ]
def limit(amount, per, timeout): """ Rate-limits the decorated call """ logger = LogHandler.get('api') def wrap(f): """ Wrapper function """ def new_function(*args, **kwargs): """ Wrapped function """ request = _find_request(args) now = time.time() key = 'ovs_api_limit_{0}.{1}_{2}'.format( f.__module__, f.__name__, request.META['HTTP_X_REAL_IP']) client = VolatileFactory.get_client() with volatile_mutex(key): rate_info = client.get(key, {'calls': [], 'timeout': None}) active_timeout = rate_info['timeout'] if active_timeout is not None: if active_timeout > now: logger.warning( 'Call {0} is being throttled with a wait of {1}'. format(key, active_timeout - now)) raise Throttled(wait=active_timeout - now) else: rate_info['timeout'] = None rate_info['calls'] = [ call for call in rate_info['calls'] if call > (now - per) ] + [now] calls = len(rate_info['calls']) if calls > amount: rate_info['timeout'] = now + timeout client.set(key, rate_info) logger.warning( 'Call {0} is being throttled with a wait of {1}'. format(key, timeout)) raise Throttled(wait=timeout) client.set(key, rate_info) return f(*args, **kwargs) new_function.__name__ = f.__name__ new_function.__module__ = f.__module__ return new_function return wrap
def limit(amount, per, timeout): """ Rate-limits the decorated call """ logger = LogHandler.get('api') def wrap(f): """ Wrapper function """ @wraps(f) def new_function(*args, **kwargs): """ Wrapped function """ request = _find_request(args) now = time.time() key = 'ovs_api_limit_{0}.{1}_{2}'.format( f.__module__, f.__name__, request.META['HTTP_X_REAL_IP'] ) client = VolatileFactory.get_client() with volatile_mutex(key): rate_info = client.get(key, {'calls': [], 'timeout': None}) active_timeout = rate_info['timeout'] if active_timeout is not None: if active_timeout > now: logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, active_timeout - now)) raise HttpTooManyRequestsException(error='rate_limit_timeout', error_description='Rate limit timeout ({0}s remaining)'.format(round(active_timeout - now, 2))) else: rate_info['timeout'] = None rate_info['calls'] = [call for call in rate_info['calls'] if call > (now - per)] + [now] calls = len(rate_info['calls']) if calls > amount: rate_info['timeout'] = now + timeout client.set(key, rate_info) logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, timeout)) raise HttpTooManyRequestsException(error='rate_limit_reached', error_description='Rate limit reached ({0} in last {1}s)'.format(calls, per)) client.set(key, rate_info) return f(*args, **kwargs) return new_function return wrap
class LicenseController(object): """ Validates licenses """ _logger = LogHandler.get('lib', name='license') @staticmethod @celery.task(name='ovs.license.validate') def validate(license_string): """ Validates a license with the various components """ try: result = {} data = LicenseController._decode(license_string) for component in data: cdata = data[component] name = cdata['name'] data = cdata['data'] _ = cdata['token'] valid_until = float(cdata['valid_until']) if 'valid_until' in cdata else None if valid_until is not None and valid_until <= time.time(): result[component] = False continue signature = cdata['signature'] if 'signature' in cdata else None validate_functions = Toolbox.fetch_hooks('license', '{0}.validate'.format(component)) apply_functions = Toolbox.fetch_hooks('license', '{0}.apply'.format(component)) if len(validate_functions) == 1 and len(apply_functions) == 1: try: valid, metadata = validate_functions[0](component=component, data=data, signature=signature) except Exception, ex: LicenseController._logger.debug('Error validating license for {0}: {1}'.format(component, ex)) valid = False metadata = None if valid is False: LicenseController._logger.debug('Invalid license for {0}: {1}'.format(component, license_string)) result[component] = False else: result[component] = {'valid_until': valid_until, 'metadata': metadata, 'name': name} else: LicenseController._logger.debug('No validate nor apply functions found for {0}'.format(component)) result[component] = False return result except Exception, ex: LicenseController._logger.exception('Error validating license: {0}'.format(ex)) raise
def __init__(self, name, wait=None): """ Creates a file mutex object """ self._logger = LogHandler.get('extensions', 'file mutex') self.name = name self._has_lock = False self._start = 0 self._handle = open(self.key(), 'w') self._wait = wait try: os.chmod( self.key(), stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH) except OSError: pass
def __init__(self, host='127.0.0.1', login='******'): self._logger = LogHandler.get('extensions', name='kvm sdk') self._logger.debug('Init libvirt') self.states = {libvirt.VIR_DOMAIN_NOSTATE: 'NO STATE', libvirt.VIR_DOMAIN_RUNNING: 'RUNNING', libvirt.VIR_DOMAIN_BLOCKED: 'BLOCKED', libvirt.VIR_DOMAIN_PAUSED: 'PAUSED', libvirt.VIR_DOMAIN_SHUTDOWN: 'SHUTDOWN', libvirt.VIR_DOMAIN_SHUTOFF: 'TURNEDOFF', libvirt.VIR_DOMAIN_CRASHED: 'CRASHED'} self.libvirt = libvirt self.host = host self.login = login self._conn = None self.ssh_client = SSHClient(self.host, username='******') self._logger.debug('Init complete')
def new_function(*args, **kwargs): """ Wrapped function """ request = _find_request(args) logging_start = time.time() method_args = list(args)[:] method_args = method_args[method_args.index(request) + 1:] # Log the call metadata = { 'meta': dict((str(key), str(value)) for key, value in request.META.iteritems()), 'request': dict((str(key), str(value)) for key, value in request.REQUEST.iteritems()), 'cookies': dict((str(key), str(value)) for key, value in request.COOKIES.iteritems()) } # Stripping password traces for mtype in metadata: for key in metadata[mtype]: if 'password' in key: metadata[mtype][key] = '**********************' _logger = LogHandler.get('log', name='api') _logger.info('[{0}.{1}] - {2} - {3} - {4} - {5}'.format( f.__module__, f.__name__, getattr(request, 'client').user_guid if hasattr( request, 'client') else None, json.dumps(method_args), json.dumps(kwargs), json.dumps(metadata))) logging_duration = time.time() - logging_start # Call the function start = time.time() return_value = f(*args, **kwargs) duration = time.time() - start if duration > 5 and log_slow is True: logger.warning('API call {0}.{1} took {2}s'.format( f.__module__, f.__name__, round(duration, 2))) if isinstance(return_value, OVSResponse): return_value.timings['logging'] = [logging_duration, 'Logging'] return return_value
def __init__(self, name, wait=None): """ Creates a file mutex object """ self._logger = LogHandler.get('extensions', 'file mutex') self.name = name self._has_lock = False self._start = 0 self._handle = open(self.key(), 'w') self._wait = wait try: os.chmod( self.key(), stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH ) except OSError: pass
def new_function(*args, **kwargs): """ Wrapped function :param args: Arguments without default values :param kwargs: Arguments with default values """ # Log the call if event_type == 'VOLUMEDRIVER_TASK' and 'storagedriver_id' in kwargs: metadata = {'storagedriver': StorageDriverList.get_by_storagedriver_id(kwargs['storagedriver_id']).guid} else: metadata = {} _logger = LogHandler.get('log', name=event_type.lower()) _logger.info('[{0}.{1}] - {2} - {3} - {4}'.format( function.__module__, function.__name__, json.dumps(list(args)), json.dumps(kwargs), json.dumps(metadata) )) # Call the function return function(*args, **kwargs)
def process_exception(self, request, exception): """ Logs information about the given error """ _ = self, request logger = LogHandler.get('api', 'middleware') if OVSMiddleware.is_own_httpexception(exception): return HttpResponse(exception.data, status=exception.status_code, content_type='application/json') if isinstance(exception, MissingMandatoryFieldsException): return HttpResponse(json.dumps({'error': 'invalid_data', 'error_description': exception.message}), status=400, content_type='application/json') logger.exception('An unhandled exception occurred: {0}'.format(exception)) return HttpResponse( json.dumps({'error': 'internal_server', 'error_description': exception.message}), status=500, content_type='application/json' )
def load_ovs_logger(**kwargs): """Load a logger.""" if 'logger' in kwargs: kwargs['logger'] = LogHandler.get('celery', name='celery')
def create_cluster(cluster_name, cluster_type, ip, base_dir, plugins=None, locked=True, internal=True, filesystem=False, ports=None): """ Always creates a cluster but marks it's usage according to the internal flag :param cluster_name: Name of the cluster :type cluster_name: str :param cluster_type: Type of the cluster (See ServiceType.ARAKOON_CLUSTER_TYPES) :type cluster_type: str :param ip: IP address of the first node of the new cluster :type ip: str :param base_dir: Base directory that should contain the data and tlogs :type base_dir: str :param plugins: Plugins that should be added to the configuration file :type plugins: dict :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts) :type locked: bool :param internal: Is cluster internally managed by OVS :type internal: bool :param filesystem: Indicates whether the configuration should be on the filesystem or in a configuration cluster :type filesystem: bool :param ports: A list of ports to be used for this cluster's node :type ports: list :return: Ports used by arakoon cluster :rtype: dict """ if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES: raise ValueError('Cluster type {0} is not supported. Please choose from {1}'.format(cluster_type, ', '.join(ServiceType.ARAKOON_CLUSTER_TYPES))) client = SSHClient(ip, username=ArakoonInstaller.SSHCLIENT_USER) if filesystem is True: exists = client.file_exists(ArakoonClusterConfig.CONFIG_FILE.format(cluster_name)) else: exists = Configuration.dir_exists('/ovs/arakoon/{0}'.format(cluster_name)) if exists is True: raise ValueError('An Arakoon cluster with name "{0}" already exists'.format(cluster_name)) ArakoonInstaller._logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip)) node_name = System.get_my_machine_id(client) base_dir = base_dir.rstrip('/') home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name) tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name) ArakoonInstaller.clean_leftover_arakoon_data(ip, [home_dir, tlog_dir]) port_mutex = None try: if locked is True: from ovs.extensions.generic.volatilemutex import volatile_mutex port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(ip)) port_mutex.acquire(wait=60) if ports is None: ports = ArakoonInstaller._get_free_ports(client) config = ArakoonClusterConfig(cluster_name, filesystem, plugins.keys() if plugins is not None else None) config.nodes.append(ArakoonNodeConfig(name=node_name, ip=ip, client_port=ports[0], messaging_port=ports[1], log_sinks=LogHandler.get_sink_path('arakoon_server'), crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'), home=home_dir, tlog_dir=tlog_dir)) metadata = {'internal': internal, 'cluster_name': cluster_name, 'cluster_type': cluster_type.upper(), 'in_use': False} service_metadata = ArakoonInstaller._deploy(config=config, filesystem=filesystem, plugins=plugins.values() if plugins is not None else None, delay_service_registration=cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG)[ip] finally: if port_mutex is not None: port_mutex.release() ArakoonInstaller._logger.debug('Creating cluster {0} on {1} completed'.format(cluster_name, ip)) return {'metadata': metadata, 'client_port': ports[0], 'messaging_port': ports[1], 'service_metadata': service_metadata}
def __init__(self): """ Dummy init method """ self._logger = LogHandler.get('extensions', name='watcher')
def ensure_single(task_name, extra_task_names=None, mode='DEFAULT', global_timeout=300): """ Decorator ensuring a new task cannot be started in case a certain task is running, scheduled or reserved. Keep also in mind that validation will be executed by the worker itself, so if the task is scheduled on a worker currently processing a "duplicate" task, it will only get validated after the first one completes, which will result in the fact that the task will execute normally. Allowed modes: - DEFAULT: De-duplication based on the task's name. If any new task with the same name is scheduled it will be discarded - DEDUPED: De-duplication based on the task's name and arguments. If a new task with the same name and arguments is scheduled while the first one is currently being executed, it will be allowed on the queue (to make sure there will be at least one new execution). All subsequent identical tasks will be discarded. Tasks with different arguments will be executed in parallel - CHAINED: Identical as DEDUPED with the exception that all tasks will be executed in serial. :param task_name: Name of the task to ensure its singularity :type task_name: String :param extra_task_names: Extra tasks to take into account :type extra_task_names: List :param mode: Mode of the ensure single. Allowed values: DEFAULT, CHAINED :type mode: String :param global_timeout: Timeout before raising error (Only applicable in CHAINED mode) :type global_timeout: Integer :return: Pointer to function """ logger = LogHandler.get('lib', name='ensure single') def wrap(function): """ Wrapper function :param function: Function to check """ def new_function(*args, **kwargs): """ Wrapped function :param args: Arguments without default values :param kwargs: Arguments with default values """ def log_message(message, level='info'): """ Log a message with some additional information :param message: Message to log :param level: Log level :return: None """ if level not in ('info', 'warning', 'debug', 'error', 'exception'): raise ValueError('Unsupported log level "{0}" specified'.format(level)) complete_message = 'Ensure single {0} mode - ID {1} - {2}'.format(mode, now, message) getattr(logger, level)(complete_message) def update_value(key, append, value_to_update=None): """ Store the specified value in the PersistentFactory :param key: Key to store the value for :param append: If True, the specified value will be appended else element at index 0 will be popped :param value_to_update: Value to append to the list or remove from the list :return: Updated value """ with volatile_mutex(name=key, wait=5): if persistent_client.exists(key): val = persistent_client.get(key) if append is True and value_to_update is not None: val['values'].append(value_to_update) elif append is False and value_to_update is not None: for value_item in val['values']: if value_item == value_to_update: val['values'].remove(value_item) break elif append is False and len(val['values']) > 0: val['values'].pop(0) log_message('Amount of jobs pending for key {0}: {1}'.format(key, len(val['values']))) for kwarg in val['values']: log_message(' KWARGS: {0}'.format(kwarg['kwargs'])) else: log_message('Setting initial value for key {0}'.format(key)) val = {'mode': mode, 'values': []} persistent_client.set(key, val) return val now = '{0}_{1}'.format(int(time.time()), ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10))) task_names = [task_name] if extra_task_names is None else [task_name] + extra_task_names persistent_key = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task_name) persistent_client = PersistentFactory.get_client() if mode == 'DEFAULT': with volatile_mutex(persistent_key, wait=5): for task in task_names: key_to_check = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task) if persistent_client.exists(key_to_check): log_message('Execution of task {0} discarded'.format(task_name)) return None log_message('Setting key {0}'.format(persistent_key)) persistent_client.set(persistent_key, {'mode': mode}) try: output = function(*args, **kwargs) log_message('Task {0} finished successfully'.format(task_name)) return output finally: with volatile_mutex(persistent_key, wait=5): if persistent_client.exists(persistent_key): log_message('Deleting key {0}'.format(persistent_key)) persistent_client.delete(persistent_key) elif mode == 'DEDUPED': with volatile_mutex(persistent_key, wait=5): if extra_task_names is not None: for task in extra_task_names: key_to_check = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task) if persistent_client.exists(key_to_check): log_message('Execution of task {0} discarded'.format(task_name)) return None log_message('Setting key {0}'.format(persistent_key)) # Update kwargs with args timeout = kwargs.pop('ensure_single_timeout') if 'ensure_single_timeout' in kwargs else global_timeout function_info = inspect.getargspec(function) kwargs_dict = {} for index, arg in enumerate(args): kwargs_dict[function_info.args[index]] = arg kwargs_dict.update(kwargs) params_info = 'with params {0}'.format(kwargs_dict) if kwargs_dict else 'with default params' # Set the key in arakoon if non-existent value = update_value(key=persistent_key, append=True) # Validate whether another job with same params is being executed job_counter = 0 for item in value['values']: if item['kwargs'] == kwargs_dict: job_counter += 1 if job_counter == 2: # 1st job with same params is being executed, 2nd is scheduled for execution ==> Discard current log_message('Execution of task {0} {1} discarded because of identical parameters'.format(task_name, params_info)) return None log_message('New task {0} {1} scheduled for execution'.format(task_name, params_info)) update_value(key=persistent_key, append=True, value_to_update={'kwargs': kwargs_dict}) # Poll the arakoon to see whether this call is the only in list, if so --> execute, else wait counter = 0 while counter < timeout: if persistent_client.exists(persistent_key): values = persistent_client.get(persistent_key)['values'] queued_jobs = [v for v in values if v['kwargs'] == kwargs_dict] if len(queued_jobs) == 1: try: if counter != 0: current_time = int(time.time()) starting_time = int(now.split('_')[0]) log_message('Task {0} {1} had to wait {2} seconds before being able to start'.format(task_name, params_info, current_time - starting_time)) output = function(*args, **kwargs) log_message('Task {0} finished successfully'.format(task_name)) return output finally: update_value(key=persistent_key, append=False, value_to_update={'kwargs': kwargs_dict}) counter += 1 time.sleep(1) if counter == timeout: update_value(key=persistent_key, append=False, value_to_update={'kwargs': kwargs_dict}) log_message('Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'.format(task_name, params_info, timeout), level='error') raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode, now, task_name, timeout)) elif mode == 'CHAINED': if extra_task_names is not None: log_message('Extra tasks are not allowed in this mode', level='error') raise ValueError('Ensure single {0} mode - ID {1} - Extra tasks are not allowed in this mode'.format(mode, now)) # Create key to be stored in arakoon and update kwargs with args timeout = kwargs.pop('ensure_single_timeout') if 'ensure_single_timeout' in kwargs else global_timeout function_info = inspect.getargspec(function) kwargs_dict = {} for index, arg in enumerate(args): kwargs_dict[function_info.args[index]] = arg kwargs_dict.update(kwargs) params_info = 'with params {0}'.format(kwargs_dict) if kwargs_dict else 'with default params' # Set the key in arakoon if non-existent value = update_value(key=persistent_key, append=True) # Validate whether another job with same params is being executed, skip if so for item in value['values'][1:]: # 1st element is processing job, we check all other queued jobs for identical params if item['kwargs'] == kwargs_dict: log_message('Execution of task {0} {1} discarded because of identical parameters'.format(task_name, params_info)) return None log_message('New task {0} {1} scheduled for execution'.format(task_name, params_info)) update_value(key=persistent_key, append=True, value_to_update={'kwargs': kwargs_dict, 'timestamp': now}) # Poll the arakoon to see whether this call is the first in list, if so --> execute, else wait first_element = None counter = 0 while counter < timeout: if persistent_client.exists(persistent_key): value = persistent_client.get(persistent_key) first_element = value['values'][0]['timestamp'] if len(value['values']) > 0 else None if first_element == now: output = None try: if counter != 0: current_time = int(time.time()) starting_time = int(now.split('_')[0]) log_message('Task {0} {1} had to wait {2} seconds before being able to start'.format(task_name, params_info, current_time - starting_time)) output = function(*args, **kwargs) log_message('Task {0} finished successfully'.format(task_name)) except Exception: log_message('Task {0} {1} failed'.format(task_name, params_info), level='exception') raise finally: update_value(key=persistent_key, append=False) return output counter += 1 time.sleep(1) if counter == timeout: update_value(key=persistent_key, append=False) log_message('Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'.format(task_name, params_info, timeout), level='error') raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode, now, task_name, timeout)) else: raise ValueError('Unsupported mode "{0}" provided'.format(mode)) new_function.__name__ = function.__name__ new_function.__module__ = function.__module__ return new_function return wrap
try: for task in return_data['tasks']: self._process_task(task['code'], task['metadata'], self.servicemanager) except Exception, ex: SupportAgent._logger.exception('Unexpected error processing tasks: {0}'.format(ex)) raise if 'interval' in return_data: interval = return_data['interval'] if interval != self.interval: self.interval = interval self._update_config('interval', str(interval)) self.interval = return_data['interval'] if __name__ == '__main__': logger = LogHandler.get('support', name='agent') try: if Configuration.get('/ovs/framework/support|enabled') is False: print 'Support not enabled' sys.exit(0) logger.info('Starting up') client = SupportAgent() while True: try: client.run() time.sleep(client.interval) except KeyboardInterrupt: raise except Exception, exception: logger.exception('Unexpected error during run: {0}'.format(exception)) time.sleep(10)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod(scrub_directory, 0777) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)} ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety(VDisk(vdisk_guid)) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: res = locked_client.scrub(work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)], backend_config=Configuration.get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result(scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)
# C0111: Missing docstring # W0142: Used * or ** magic # R0912: Too many branches # C0103: Invalid name # W0212: Access to a protected member # R0913: Too many arguments # W0201: Attributed defined outside __init__ # W0231: __init__ method from base class X is not called # R0903: Too few public methods # W0223: Method X is abstract in class Y but not overridden # R0201: Method could be a function # W0703: Catch "Exception" # E1121: Too many positional arguments for function call # R0904: Too many public methods LOGGER = LogHandler.get("arakoon_client", "pyrakoon") class Consistency: pass class Consistent(Consistency): def __str__(self): return "Consistent" class NoGuarantee(Consistency): def __str__(self): return "NoGuarantee"
import argparse parser = argparse.ArgumentParser( description="Rabbitmq Event Processor for OVS", formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument("rabbitmq_queue", type=str, help="Rabbitmq queue name") parser.add_argument( "--durable", dest="queue_durable", action="store_const", default=False, const=True, help="Declare queue as durable", ) logger = LogHandler.get("extensions", name="consumer") args = parser.parse_args() try: run_event_consumer = False my_ip = Configuration.get("/ovs/framework/hosts/{0}/ip".format(System.get_my_machine_id())) for endpoint in Configuration.get("/ovs/framework/messagequeue|endpoints"): if endpoint.startswith(my_ip): run_event_consumer = True if run_event_consumer is True: # Load mapping mapping = {} path = "/".join([os.path.dirname(__file__), "mappings"]) for filename in os.listdir(path): if os.path.isfile("/".join([path, filename])) and filename.endswith(".py"):
def __init__(self): """ Init """ self._logger = LogHandler.get('extensions', name='fstab') self.fstab_file = '/etc/fstab'
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. '''Utility functions''' import __builtin__ import uuid import functools import itertools from ovs.log.log_handler import LogHandler LOGGER = LogHandler.get('arakoon_client', 'pyrakoon') '''Logger for code in this module''' #pylint: disable=W0105 def update_argspec(*argnames): #pylint: disable=R0912 '''Wrap a callable to use real argument names When generating functions at runtime, one often needs to fall back to ``*args`` and ``**kwargs`` usage. Using these features require well-documented code though, and renders API documentation tools less useful. The decorator generated by this function wraps a decorated function, which takes ``**kwargs``, into a function which takes the given argument names as parameters, and passes them to the decorated function as keyword arguments.
def __init__(self, node): self._logger = LogHandler.get("extensions", name="asdmanagerclient") self.node = node self.timeout = 20 self._unittest_mode = os.environ.get("RUNNING_UNITTESTS") == "True" self._log_min_duration = 1
def run(command, config=None, named_params=None, extra_params=None, client=None, debug=False): """ Executes a command on ALBA When --to-json is NOT passed: * An error occurs --> exitcode != 0 * It worked --> exitcode == 0 When --to-json is passed: * An errors occurs during verification of parameters passed -> exitcode != 0 * An error occurs while executing the command --> exitcode == 0 (error in json output) * It worked --> exitcode == 0 :param command: The command to execute, eg: 'list-namespaces' :type command: str :param config: The configuration location to be used, eg: 'arakoon://config/ovs/arakoon/ovsdb/config?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini' :type config: str :param named_params: Additional parameters to be given to the command, eg: {'long-id': ','.join(asd_ids)} :type named_params: dict :param extra_params: Additional parameters to be given to the command, eg: [name] :type extra_params: list :param client: A client on which to execute the command :type client: ovs.extensions.generic.sshclient.SSHClient :param debug: Log additional output :type debug: bool :return: The output of the command :rtype: dict """ if named_params is None: named_params = {} if extra_params is None: extra_params = [] logger = LogHandler.get('extensions', name='alba-cli') if os.environ.get('RUNNING_UNITTESTS') == 'True': # For the unittest, all commands are passed to a mocked Alba from ovs.extensions.plugins.tests.alba_mockups import VirtualAlbaBackend named_params.update({'config': config}) named_params.update({'extra_params': extra_params}) return getattr(VirtualAlbaBackend, command.replace('-', '_'))(**named_params) debug_log = [] try: cmd_list = ['/usr/bin/alba', command, '--to-json'] if config is not None: cmd_list.append('--config={0}'.format(config)) for key, value in named_params.iteritems(): cmd_list.append('--{0}={1}'.format(key, value)) cmd_list.extend(extra_params) cmd_string = ' '.join(cmd_list) debug_log.append('Command: {0}'.format(cmd_string)) start = time.time() try: if client is None: try: if not hasattr(select, 'poll'): import subprocess subprocess._has_poll = False # Damn 'monkey patching' channel = Popen(cmd_list, stdout=PIPE, stderr=PIPE, universal_newlines=True) except OSError as ose: raise CalledProcessError(1, cmd_string, str(ose)) output, stderr = channel.communicate() output = re.sub(r'[^\x00-\x7F]+', '', output) stderr_debug = 'stderr: {0}'.format(stderr) stdout_debug = 'stdout: {0}'.format(output) if debug is True: logger.debug(stderr_debug) logger.debug(stdout_debug) debug_log.append(stderr_debug) debug_log.append(stdout_debug) exit_code = channel.returncode if exit_code != 0: # Raise same error as check_output raise CalledProcessError(exit_code, cmd_string, output) else: if debug is True: output, stderr = client.run(cmd_list, return_stderr=True) debug_log.append('stderr: {0}'.format(stderr)) else: output = client.run(cmd_list).strip() debug_log.append('stdout: {0}'.format(output)) output = json.loads(output) duration = time.time() - start if duration > 0.5: logger.warning('AlbaCLI call {0} took {1}s'.format(command, round(duration, 2))) except CalledProcessError as cpe: try: output = json.loads(cpe.output) except Exception: raise RuntimeError('Executing command {0} failed with output {1}'.format(cmd_string, cpe.output)) if output['success'] is True: return output['result'] raise RuntimeError(output['error']['message']) except Exception as ex: logger.exception('Error: {0}'.format(ex)) # In case there's an exception, we always log for debug_line in debug_log: logger.debug(debug_line) raise