def pulse(): """ Update the heartbeats for the Current Routers :return: None """ logger = Logger('extensions-generic') machine_id = System.get_my_machine_id() current_time = int(time.time()) routers = StorageRouterList.get_storagerouters() for node in routers: if node.machine_id == machine_id: with volatile_mutex('storagerouter_heartbeat_{0}'.format( node.guid)): node_save = StorageRouter(node.guid) node_save.heartbeats['process'] = current_time node_save.save() StorageRouterController.ping.s( node.guid, current_time).apply_async( routing_key='sr.{0}'.format(machine_id)) else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats[ 'process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d '{0}'".format( node.name.replace(r"'", r"'\''")), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache')
def process_exception(self, request, exception): """ Logs information about the given error """ _ = self, request logger = Logger('api') if OVSMiddleware.is_own_httpexception(exception): return HttpResponse(exception.data, status=exception.status_code, content_type='application/json') if isinstance(exception, MissingMandatoryFieldsException): return HttpResponse(json.dumps({ 'error': 'invalid_data', 'error_description': exception.message }), status=400, content_type='application/json') logger.exception( 'An unhandled exception occurred: {0}'.format(exception)) return HttpResponse(json.dumps({ 'error': 'internal_server', 'error_description': exception.message }), status=500, content_type='application/json')
def relay(*args, **kwargs): """ Relays any call to another node. Assume this example: * A user wants to execute a HTTP GET on /api/storagerouters/ ** /api/<call> * He'll have to execute a HTTP GET on /api/relay/<call> ** This will translate to /apt/relay/storagerouters/ Parameters: * Mandatory: ip, port, client_id, client_secret * All other parameters will be passed through to the specified node """ @authenticated() @required_roles(['read']) @load() def _relay(_, ip, port, client_id, client_secret, raw_version, request): path = '/{0}'.format(request.path.replace('/api/relay/', '')) method = request.META['REQUEST_METHOD'].lower() client = OVSClient(ip, port, credentials=(client_id, client_secret), version=raw_version, raw_response=True, cache_store=VolatileFactory.get_client()) if not hasattr(client, method): raise HttpMethodNotAllowedException( error='unavailable_call', error_description='Method not available in relay') client_kwargs = {'params': request.GET} if method != 'get': client_kwargs['data'] = request.POST call_response = getattr(client, method)(path, **client_kwargs) response = HttpResponse(call_response.text, content_type='application/json', status=call_response.status_code) for header, value in call_response.headers.iteritems(): response[header] = value response['OVS-Relay'] = '{0}:{1}'.format(ip, port) return response try: return _relay(*args, **kwargs) except Exception as ex: if OVSMiddleware.is_own_httpexception(ex): # noinspection PyUnresolvedReferences return HttpResponse(ex.data, status=ex.status_code, content_type='application/json') message = str(ex) status_code = 400 if hasattr(ex, 'detail'): message = ex.detail if hasattr(ex, 'status_code'): status_code = ex.status_code logger = Logger('api') logger.exception('Error relaying call: {0}'.format(message)) return HttpResponse(json.dumps({ 'error_description': message, 'error': 'relay_error' }), content_type='application/json', status=status_code)
class DistributedScheduler(Scheduler): """ Distributed scheduler that can run on multiple nodes at the same time. """ TIMEOUT = 60 * 30 def __init__(self, *args, **kwargs): """ Initializes the distributed scheduler """ self._mutex = volatile_mutex('celery_beat', 10) self._logger = Logger('celery') self._has_lock = False self._lock_name = 'ovs_celery_beat_lock' self._entry_name = 'ovs_celery_beat_entries' self._persistent = PersistentFactory.get_client() self._schedule_info = {} super(DistributedScheduler, self).__init__(*args, **kwargs) self._logger.debug('DS init') def setup_schedule(self): """ Setups the schedule """ self._logger.debug('DS setting up schedule') self._load_schedule() self.merge_inplace(self._discover_schedule()) self.install_default_entries(self.schedule) for schedule, source in self._schedule_info.iteritems(): self._logger.debug('* {0} ({1})'.format(schedule, source)) self._logger.debug('DS setting up schedule - done') def _discover_schedule(self): schedules = {} self._schedule_info = {} path = '/'.join([os.path.dirname(__file__), 'lib']) for filename in os.listdir(path): if os.path.isfile('/'.join([ path, filename ])) and filename.endswith('.py') and filename != '__init__.py': name = filename.replace('.py', '') mod = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(mod, predicate=inspect.isclass): if member[1].__module__ == name: for submember in inspect.getmembers(member[1]): if hasattr(submember[1], 'schedule') and isinstance( submember[1].schedule, Schedule): schedule, source = submember[ 1].schedule.generate_schedule( submember[1].name) if schedule is not None: schedules[submember[1].name] = { 'task': submember[1].name, 'schedule': schedule, 'args': [] } self._schedule_info[submember[1].name] = source return schedules def _load_schedule(self): """ Loads the most recent schedule from the persistent store """ self.schedule = {} try: self._logger.debug('DS loading schedule entries') self._mutex.acquire(wait=10) try: self.schedule = cPickle.loads( str(self._persistent.get(self._entry_name))) except: # In case an exception occurs during loading the schedule, it is ignored and the default schedule # will be used/restored. pass finally: self._mutex.release() def sync(self): if self._has_lock is True: try: self._logger.debug('DS syncing schedule entries') self._mutex.acquire(wait=10) self._persistent.set(key=self._entry_name, value=cPickle.dumps(self.schedule)) except ArakoonSockNotReadable: self._logger.exception( 'Syncing the schedule failed this iteration') finally: self._mutex.release() else: self._logger.debug('DS skipping sync: lock is not ours') def tick(self): """ Runs one iteration of the scheduler. This is guarded with a distributed lock """ self._logger.debug('DS executing tick') try: self._has_lock = False with self._mutex: # noinspection PyProtectedMember node_now = current_app._get_current_object().now() node_timestamp = time.mktime(node_now.timetuple()) node_name = System.get_my_machine_id() try: lock = self._persistent.get(self._lock_name) except KeyNotFoundException: lock = None if lock is None: # There is no lock yet, so the lock is acquired self._has_lock = True self._logger.debug('DS there was no lock in tick') else: if lock['name'] == node_name: # The current node holds the lock self._logger.debug('DS keeps own lock') self._has_lock = True elif node_timestamp - lock[ 'timestamp'] > DistributedScheduler.TIMEOUT: # The current lock is timed out, so the lock is stolen self._logger.debug( 'DS last lock refresh is {0}s old'.format( node_timestamp - lock['timestamp'])) self._logger.debug('DS stealing lock from {0}'.format( lock['name'])) self._load_schedule() self._has_lock = True else: self._logger.debug('DS lock is not ours') if self._has_lock is True: lock = {'name': node_name, 'timestamp': node_timestamp} self._logger.debug('DS refreshing lock') self._persistent.set(self._lock_name, lock) if self._has_lock is True: self._logger.debug('DS executing tick workload') remaining_times = [] try: for entry in self.schedule.itervalues(): next_time_to_run = self.maybe_due( entry, self.publisher) if next_time_to_run: remaining_times.append(next_time_to_run) except RuntimeError: pass self._logger.debug('DS executing tick workload - done') return min(remaining_times + [self.max_interval]) else: return self.max_interval except Exception as ex: self._logger.debug('DS got error during tick: {0}'.format(ex)) return self.max_interval
def run(command, config=None, named_params=None, extra_params=None, client=None, debug=False, to_json=True): """ Executes a command on ALBA When --to-json is NOT passed: * An error occurs --> exitcode != 0 * It worked --> exitcode == 0 When --to-json is passed: * An errors occurs during verification of parameters passed -> exitcode != 0 * An error occurs while executing the command --> exitcode == 0 (error in json output) * It worked --> exitcode == 0 :param command: The command to execute, eg: 'list-namespaces' :type command: str :param config: The configuration location to be used, eg: 'arakoon://config/ovs/arakoon/ovsdb/config?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini' :type config: str :param named_params: Additional parameters to be given to the command, eg: {'long-id': ','.join(asd_ids)} :type named_params: dict :param extra_params: Additional parameters to be given to the command, eg: [name] :type extra_params: list :param client: A client on which to execute the command :type client: ovs_extensions.generic.sshclient.SSHClient :param debug: Log additional output :type debug: bool :param to_json: Request a JSON response from Alba :type to_json: bool :return: The output of the command :rtype: dict """ if named_params is None: named_params = {} if extra_params is None: extra_params = [] logger = Logger('extensions-plugins') if os.environ.get('RUNNING_UNITTESTS') == 'True': # For the unittest, all commands are passed to a mocked Alba from ovs.extensions.plugins.tests.alba_mockups import VirtualAlbaBackend named_params.update({'config': config}) named_params.update({'extra_params': extra_params}) return getattr(VirtualAlbaBackend, command.replace('-', '_'))(**named_params) debug_log = [] try: if to_json is True: extra_options = ["--to-json"] else: extra_options = [] cmd_list = ['/usr/bin/alba', command] + extra_options if config is not None: cmd_list.append('--config={0}'.format(config)) for key, value in named_params.iteritems(): cmd_list.append('--{0}={1}'.format(key, value)) cmd_list.extend(extra_params) cmd_string = ' '.join(cmd_list) debug_log.append('Command: {0}'.format(cmd_string)) start = time.time() try: if debug is True: logger.debug('Command: {0}'.format(cmd_string)) if client is None: try: if not hasattr(select, 'poll'): import subprocess subprocess._has_poll = False # Damn 'monkey patching' channel = Popen(cmd_list, stdout=PIPE, stderr=PIPE, universal_newlines=True) except OSError as ose: raise CalledProcessError(1, cmd_string, str(ose)) output, stderr = channel.communicate() output = re.sub(r'[^\x00-\x7F]+', '', output) stderr_debug = 'stderr: {0}'.format(stderr) stdout_debug = 'stdout: {0}'.format(output) if debug is True: logger.debug(stderr_debug) logger.debug(stdout_debug) debug_log.append(stderr_debug) debug_log.append(stdout_debug) exit_code = channel.returncode if exit_code != 0: # Raise same error as check_output raise CalledProcessError(exit_code, cmd_string, output) else: if debug is True: output, stderr = client.run(cmd_list, debug=True, return_stderr=True) debug_log.append('stderr: {0}'.format(stderr)) else: output = client.run(cmd_list).strip() debug_log.append('stdout: {0}'.format(output)) if to_json is True: output = json.loads(output) else: return output duration = time.time() - start if duration > 0.5: logger.warning('AlbaCLI call {0} took {1}s'.format( command, round(duration, 2))) except CalledProcessError as cpe: try: output = json.loads(cpe.output) except Exception: raise RuntimeError( 'Executing command {0} failed with output {1}'.format( cmd_string, cpe.output)) if output['success'] is True: return output['result'] raise AlbaError(output['error']['message'], output['error']['exception_code'], output['error']['exception_type']) except Exception as ex: logger.exception('Error: {0}'.format(ex)) # In case there's an exception, we always log for debug_line in debug_log: logger.debug(debug_line) raise