def pulse(): """ Update the heartbeats for all Storage Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format(EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'), EtcdConfiguration.get('/ovs/framework/messagequeue|user'), EtcdConfiguration.get('/ovs/framework/messagequeue|password'), EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))) celery_path = OSManager.get_path('celery') worker_states = check_output("{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache') node.save()
def pulse(): """ Update the heartbeats for all Storage Routers :return: None """ logger = LogHandler.get('extensions', name='heartbeat') current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format( EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'), EtcdConfiguration.get('/ovs/framework/messagequeue|user'), EtcdConfiguration.get('/ovs/framework/messagequeue|password'), EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ip'.format(machine_id))) celery_path = OSManager.get_path('celery') worker_states = check_output( "{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true" .format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: try: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats[ 'process'] >= HeartBeat.ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format( node.name), shell=True) except CalledProcessError: logger.exception('Error clearing ARP cache') node.save()
from subprocess import check_output from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.system import System from ovs.extensions.generic.configuration import Configuration from ovs.extensions.os.os import OSManager ARP_TIMEOUT = 30 current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format( Configuration.get('ovs.core.broker.protocol'), Configuration.get('ovs.core.broker.login'), Configuration.get('ovs.core.broker.password'), Configuration.get('ovs.grid.ip')) celery_path = OSManager.get_path('celery') worker_states = check_output( "{0} inspect ping -b {1} 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true" .format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats:
import time from subprocess import check_output from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.system import System from ovs.extensions.generic.configuration import Configuration from ovs.extensions.os.os import OSManager ARP_TIMEOUT = 30 current_time = int(time.time()) machine_id = System.get_my_machine_id() amqp = '{0}://{1}:{2}@{3}//'.format(Configuration.get('ovs.core.broker.protocol'), Configuration.get('ovs.core.broker.login'), Configuration.get('ovs.core.broker.password'), Configuration.get('ovs.grid.ip')) celery_path = OSManager.get_path('celery') worker_states = check_output("{0} inspect ping -b {1} 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True) routers = StorageRouterList.get_storagerouters() for node in routers: if node.heartbeats is None: node.heartbeats = {} if 'celery@{0}: OK'.format(node.name) in worker_states: node.heartbeats['celery'] = current_time if node.machine_id == machine_id: node.heartbeats['process'] = current_time else: # check timeout of other nodes and clear arp cache if node.heartbeats and 'process' in node.heartbeats: if current_time - node.heartbeats['process'] >= ARP_TIMEOUT: check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True) node.save()