def initialization(): global arearesilience, agentstart, lightdiscovery # 0. Waitting time LOG.info('INIT: Wait {:.2f}s to start'.format(CPARAMS.TIME_WAIT_INIT)) sleep(CPARAMS.TIME_WAIT_INIT) LOG.debug('INIT: Wake Me up Before You Go-Go ♫') # 1. Area Resilience Module Creation LOG.debug('Area Resilience submodule creation') arearesilience = AreaResilience(cimi, policiesdistribution.LPP) LOG.debug('Area Resilience created') # 2. Leader Reelection Module Creation (None) # 3. Agent Start Module Creation LOG.debug('Agent Start submodule creation') if CPARAMS.MF2C_FLAG: agentstart = AgentStart(addr_pol=('127.0.0.1', '46050'), addr_dis=('discovery', '46040'), addr_cat=('resource-categorization', '46070'), addr_id=('identification', '46060')) else: agentstart = AgentStart(addr_pol=('127.0.0.1', '46050')) agentstart.deviceID = CPARAMS.DEVICEID_FLAG if CPARAMS.LEADER_IP_FLAG is not None and len(CPARAMS.LEADER_IP_FLAG) != 0: agentstart.leaderIP = CPARAMS.LEADER_IP_FLAG LOG.debug('Agent Start created') # 4. Light Discovery Module Creation LOG.debug('Light Discovery submodule creation') lightdiscovery = LightDiscovery(CPARAMS.BROADCAST_ADDR_FLAG,CPARAMS.DEVICEID_FLAG) LOG.debug('Light discovery created') return
def __becomeLeader(self): # TODO """ :return: """ # 1- Shutdown/Notify all the modules involving Agent to Leader transiction. if self._leaderFailed: # Only if leader fails, triggers are needed, otherwise no action is required try: r = requests.get( URLS.build_url_address( '{}leader'.format(URLS.URL_POLICIES_ROLECHANGE), portaddr=( '127.0.0.1', '46050'))) #TODO Addr+Prt by CPARAMS; Parametrize LOG.info( self.TAG + 'Trigger to AgentStart Switch done. {}'.format(r.json())) self._imLeader = True self._imBackup = False except Exception as ex: LOG.exception(self.TAG + '_becomeLeader trigger to AgentStart failed') self.th_keep = threading.Thread(name='ar_keeper', target=self.__keeper, daemon=True) self.th_keep.start()
def stop(self): """ Stop all the module activity :return: """ if self.isStarted: self._connected = False if self.th_proc is not None: while self.th_proc.is_alive(): LOG.debug(self.TAG + 'Waiting {} to resume activity...'.format( self.th_proc.name)) sleep(0.5) if self.th_keep is not None: while self.th_keep.is_alive(): LOG.debug(self.TAG + 'Waiting {} to resume activity...'.format( self.th_keep.name)) sleep(0.1) LOG.info(self.TAG + 'All threads stoped. AreaResilience module is stopped.') else: LOG.info(self.TAG + 'Module is not started') return
def __print_summary(self): s = "\n######################## FCJP ##########################\n" summary = self.summary() for item in summary.keys(): s += '\t[\"{}\"] : {}\n'.format(item, summary.get(item)) s += "########################################################\n" LOG.info(s)
def switch(self, imLeader): if not self.isStarted: LOG.error('Agent is not started!') return False if self.th_proc.is_alive(): LOG.debug( self.TAG + 'Stopping thread {} to switch...'.format(self.th_proc.name)) self._connected = False self.th_proc.join() LOG.debug('Thread successfully stopped.') self._connected = True if self.imLeader != imLeader: LOG.warning( 'imLeader state is not consequent!') # TODO: Action required if self.imLeader: # Switch to Agent LOG.info(self.TAG + 'Switch to Agent') self.imLeader = False self.th_proc = threading.Thread(name='fcjp_agent', target=self.__agent_switch_flow, daemon=True) else: # Switch to Leader LOG.info(self.TAG + 'Switch to Leader') # TODO: Create a thread if we don't want blocking feature (AR wait until leader is set - slow) self.imLeader = True self.th_proc = threading.Thread(name='fcjp_leader', target=self.__leader_switch_flow, daemon=True) self.th_proc.start() return True
def receivePolicies(self, payload): for key in payload: if key in self.__POLICIES.keys(): self.__POLICIES[key].set_json(payload[key]) LOG.info('Policies Received from Leader.') for policy in self.__POLICIES.keys(): LOG.debug('[{}] - {}'.format(policy, self.__POLICIES[policy].get_json())) return True
def startScanning(self): if self._isStarted or self._isScanning: LOG.warning( 'LDiscovery is already started: isStarted={} isScanning={}'. format(self._isStarted, self._isScanning)) return False self._th_proc = threading.Thread(name='LDiscS', target=self.__scanning_flow, daemon=True) self._connected = True self._isStarted = True self._isScanning = True self.leaderIP = None self.leaderID = None self._th_proc.start() LOG.info('LDiscovery successfully started in Scan Mode.') return True
def start(self, deviceID): # TODO: Give deviceID at startup? """ :return: """ self._deviceID = deviceID if self.isStarted: LOG.warning(self.TAG + 'Procedure is already started...') return False else: self.th_proc = threading.Thread(name='area_res', target=self.__common_flow, daemon=True) self.th_proc.start() self.isStarted = True LOG.info(self.TAG + 'Module Started') return True
def addBackup(self, deviceID, deviceIP, priority): found = False with self.backupDatabaseLock: for backup in self.backupDatabase: if backup.deviceID == deviceID: LOG.debug(self.TAG + 'Backup {} found!'.format(deviceID)) found = True break if not found: correct = self.__send_election_message(deviceIP) if correct: new_backup = BackupEntry(deviceID, deviceIP, priority) with self.backupDatabaseLock: self.backupDatabase.append(new_backup) LOG.info('Backup {}[{}] added with priority {}'.format( deviceID, deviceIP, priority)) return correct
def stopScanning(self): if self._isStarted and self._isScanning: self._connected = False try: self._socket.shutdown(socket.SHUT_RDWR) self._socket.close() LOG.debug('Socket closed on scanning') except: pass self._th_proc.join() LOG.info('LDisc Scanning Stopped') self._isScanning = False self._isStarted = False self.leaderIP = None self.leaderID = None else: LOG.warning('LDisc is not Scanning.') return True
def startBeaconning(self): if self._isStarted or self._isBroadcasting: LOG.warning( 'LDiscovery is already started: isStarted={} isBroadcasting={}' .format(self._isStarted, self._isBroadcasting)) return False self._th_proc = threading.Thread(name='LDiscB', target=self.__beaconning_flow, daemon=True) self._connected = True self._isStarted = True self._isBroadcasting = True self.leaderIP = None self.leaderID = self._deviceID self._db = {} self._th_proc.start() LOG.info('LDiscovery successfully started in Beacon Mode.') return True
def __backupLeader_flow(self): if not self._connected: LOG.error('Module stoped due _connected = False') return if not self._imLeader: # I've been promoted as backup LOG.info(self.TAG + 'I\'m selected to be a backup. Seting up') self.__preBackupSetup() self.__becomeBackup() if not self._connected: return # Multiple backups support if self._backupPriority > 0: sleep_time = 1. + 10 * (self._backupPriority - 1) LOG.info( 'Waiting {}s before leader takeover...'.format(sleep_time)) sleep(sleep_time) if not self._connected: return LOG.debug('Checking if new Leader is up...') new_leader = self.__getCIMIData('disc_leaderIP', default='') LOG.debug('Stored Leader = [{}], Detected Leader = [{}]'.format( self._leaderIP, new_leader)) if new_leader == '' or new_leader == self._leaderIP: LOG.warning('Leader not detected by Discovery') elif self._leaderIP != new_leader: LOG.info( 'Correct Leader takeover by a backup with more preference.' ) try: # TODO: Clean solution r = requests.get('{}agent'.format( URLS.build_url_address(URLS.URL_POLICIES_ROLECHANGE, addr='127.0.0.1', port=CPARAMS.POLICIES_PORT)), timeout=.5) except: pass finally: return if not self._connected: return if self._imLeader or self._leaderFailed: # I'm a leader LOG.info(self.TAG + 'Leader seting up') self.__becomeLeader() self.__backupSelection() return
def initialization(): global arearesilience, agentstart # 0. Waitting time LOG.info('INIT: Wait {:.2f}s to start'.format(CPARAMS.TIME_WAIT_INIT)) sleep(CPARAMS.TIME_WAIT_INIT) LOG.debug('INIT: Wake Me up Before You Go-Go ♫') # 1. Area Resilience Module Creation LOG.debug('Area Resilience submodule creation') arearesilience = AreaResilience(cimi) LOG.debug('Area Resilience created') # 2. Leader Reelection Module Creation (None) # 3.1 Discovery IP adquisition result = subprocess.run(['/bin/ip', 'route'], stdout=subprocess.PIPE) route_ip = bytes(result.stdout).decode() route_ip_l = route_ip.split('\n') server_ip = '' if len(route_ip_l) > 0: for line in route_ip_l: if 'default' in line: server_ip = line.split(' ')[2] break if server_ip == '': LOG.error('Discovery IP cannot be received. Stopping.') exit(4) # 3. Agent Start Module Creation LOG.debug('Agent Start submodule creation') if CPARAMS.MF2C_FLAG: agentstart = AgentStart(addr_pol=('127.0.0.1', '46050'), addr_dis=('{}'.format(server_ip), '46040'), addr_cat=('resource-categorization', '46070'), addr_id=('identification', '46060')) else: agentstart = AgentStart(addr_pol=('127.0.0.1', '46050')) if CPARAMS.LEADER_IP_FLAG is not None and len(CPARAMS.LEADER_IP_FLAG) != 0: agentstart.leaderIP = CPARAMS.LEADER_IP_FLAG LOG.debug('Agent Start created') return
def __backupLeader_flow(self): if not self._connected: LOG.error('Module stoped due _connected = False') return if not self._imLeader: # I've been promoted as backup LOG.info(self.TAG + 'I\'m selected to be a backup. Seting up') self.__preBackupSetup() self.__becomeBackup() if not self._connected: return if self._imLeader or self._leaderFailed: # I'm a leader LOG.info(self.TAG + 'Leader seting up') self.__becomeLeader() self.__backupSelection() return
def promotedToBackup(self, leaderIP): """ The agent is promoted to be a backup :return: """ # First check if Agent was electable self._leaderIP = leaderIP if self._imCapable: LOG.info(self.TAG + 'Becoming backup due leader selection.') # Then, check if AreaResilience thread is running if self.th_proc is None: pass elif not self.th_proc.is_alive(): pass elif self._imLeader or self._imBackup: LOG.error( 'Agent is already a Backup/Leader. Cannot become a Backup.' ) return False else: LOG.warning( 'Area Resilience still starting. Cannot promote on this state. Waiting...' ) while self.th_proc.is_alive(): sleep(0.1) LOG.debug('Successful waiting.') LOG.debug('Module is ready for promotion.') self.th_proc = threading.Thread(name='area_res', target=self.__backupLeader_flow, daemon=True) self.th_proc.start() self.isStarted = True return True else: if not self._startupCorrect: LOG.warning( 'Area Resilience still starting. Cannot promote on this state.' ) else: LOG.error('Agent not capable to be Backup/Leader') return False
def __scanning_flow(self): # 1. Get Beacon # 2. Categorize # 3. Send Categorization info self._socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: self._socket.bind(('0.0.0.0', CPARAMS.LDISCOVERY_PORT)) LOG.info('Scan server created correctly') except: LOG.exception('Error on creating the scan receiver') return while self._connected: try: data, addr = self._socket.recvfrom(4096) if not self._connected: break LOG.debug('Received beacon from [{}]: \"{}\"'.format( addr[0], data.decode())) self.leaderIP = addr[0] try: ddata = loads(data.decode()) self.leaderID = ddata.get('leaderID') except JSONDecodeError: LOG.warning('Beacon payload malformed') cpu, mem, stg = self.__categorize_device() LOG.debug('CPU: {}, MEM: {}, STG: {}'.format(cpu, mem, stg)) payload = DeviceInformation(deviceID=self._deviceID, cpuCores=cpu, memAvail=mem, stgAvail=stg).getDict() LOG.info('Sending beacon reply to Leader...') r = requests.post(URLS.build_url_address( URLS.URL_BEACONREPLY, portaddr=(addr[0], CPARAMS.POLICIES_PORT)), json=payload, timeout=2) if r.status_code == 200: LOG.info('Discovery Message successfully sent to Leader') else: LOG.warning( 'Discovery Message received error status code {}'. format(r.status_code)) except: LOG.exception('Error on beacon received') try: self._socket.close() LOG.info('Scan Server Stopped') except: LOG.exception('Server Stop not successful')
def receive_keepalive(self, deviceID, deviceIP): with self.backupDatabaseLock: for backup in self.backupDatabase: if backup.deviceID == deviceID: # It's a match backup.TTL = BackupEntry.MAX_TTL LOG.debug( 'backupID: {}; backupIP: {}; priority: {}; Keepalive received correctly' .format(backup.deviceID, backup.deviceIP, backup.priority)) return True, backup.priority elif backup.deviceIP == deviceIP: LOG.info('Updating deviceID {} for backup [{}]'.format( deviceID, deviceIP)) backup.deviceID = deviceID backup.TTL = BackupEntry.MAX_TTL LOG.debug( 'backupID: {}; backupIP: {}; priority: {}; Keepalive received correctly' .format(backup.deviceID, backup.deviceIP, backup.priority)) return True, backup.priority return False, self.PRIORITY_ON_DEMOTION
def __common_flow(self): self._connected = True if not self.__imLeader(): LOG.info(self.TAG + 'I\'m not a Leader.') # I'm not a leader if self.__imCapable(): LOG.info(self.TAG + 'I\'m capable to be Leader.') self._imCapable = True # Can be a backup self.__preSelectionSetup() LOG.info(self.TAG + 'Waiting to be selected.') else: # Can not be a backup LOG.info(self.TAG + 'I\'m NOT capable to be Leader.') self._startupCorrect = True if self._imLeader: # Starting as Leader self.__backupLeader_flow() return
def reelection(arearesilience, deviceID, deviceIP): # 0. Check if is not a backup backups = arearesilience.getBackupDatabase() LOG.debug('Backup database query: {}'.format(backups)) found = False for backup in backups: if backup.deviceID == deviceID: found = True break # If backup: Go to step # 2 if not found: LOG.debug('Device {} is not an active backup.'.format(deviceID)) # 1. Promote device to Backup # Ok? Go to 3, return False otherwise ok = arearesilience.addBackup( deviceID, deviceIP, arearesilience.PRIORITY_ON_REELECTION) if not ok: LOG.error( 'Proposed device cannot be promoted to backup for reelection' ) return False else: LOG.info('Device {} is an active backup.'.format(deviceID)) # 2. Change preference to 0 backup.priority = arearesilience.PRIORITY_ON_REELECTION # 3. Demote other backups (if any) backups = arearesilience.getBackupDatabase() for backup in backups: if backup.deviceID != deviceID: # Delete ok = arearesilience.deleteBackup(backup.deviceID) if ok: LOG.info( 'Backup {}[{}] demoted successfully due Leader Reelection.' .format(backup.deviceID, backup.deviceIP)) else: LOG.error( 'Error on Backup deletion {}[{}] in Leader Reelection.' .format(backup.deviceID, backup.deviceIP)) # 4. Demote leader (rest call self) r = requests.get('{}agent'.format( URLS.build_url_address(URLS.URL_POLICIES_ROLECHANGE, portaddr=('127.0.0.1', CPARAMS.POLICIES_PORT)))) if r.status_code == 200: # Correct LOG.info('Leader (self) demoted successfully') return True LOG.warning('Leader not demoted or confirmation not received') return False
def debug(): sleep(10) # Give some time to the webservice LOG.info('Starting LDiscovery...') if CPARAMS.LEADER_FLAG: r = requests.get(URLS.build_url_address('{}beacon/start'.format(URLS.URL_LDISCOVERY_CONTROL), portaddr=('127.0.0.1', CPARAMS.POLICIES_PORT))) else: r = requests.get(URLS.build_url_address('{}scan/start'.format(URLS.URL_LDISCOVERY_CONTROL), portaddr=('127.0.0.1', CPARAMS.POLICIES_PORT))) LOG.info('LDiscovery started with status_code = {}'.format(r.status_code)) LOG.info('Starting Area Resilience...') r = requests.get(URLS.build_url_address(URLS.URL_POLICIES, portaddr=('127.0.0.1', CPARAMS.POLICIES_PORT))) LOG.debug('Area Resilience request result: {}'.format(r.json())) LOG.debug('Stopping thread activity.') return
def debug(): sleep(10) LOG.info('Device registration procedure...') attempt = 0 successful = False while attempt < CPARAMS.REGISTRATION_MAX_RETRY and not successful: try: r = requests.post( URLS.build_url_address(URLS.URL_IDENTIFICATION_START, portaddr=('identification', '46060'))) rjson = r.json() LOG.debug('Identification request result: {}'.format(rjson)) if rjson['status'] == '412' and CPARAMS.CLOUD_FLAG: # We need to wait until user and password is registered LOG.warning('Cloud user not registered yet... Retry in 10s.') elif rjson['status'] in ('200', '201'): LOG.info('Successful registration of the device.') successful = True elif CPARAMS.DEBUG_FLAG: LOG.warning( 'Status code received different from 200 or 201. Debug mode skips this failure.' ) successful = True else: LOG.warning('Error on registration trigger. Retry in 10s...') successful = False except ValueError: LOG.debug('ValueError raised on Identification: {}'.format(r.text)) except: LOG.warning('Early start of Identification not successful.') finally: if not successful: sleep(10) attempt += 1 if not CPARAMS.DEBUG_FLAG and not successful: LOG.critical( 'Critical Error: Registration of the device not successful. Stopping module' ) exit(4) sleep(5) LOG.info('Starting Agent Flow...') r = requests.get( URLS.build_url_address(URLS.URL_START_FLOW, portaddr=('127.0.0.1', CPARAMS.POLICIES_PORT))) # r = requests.get(URLS.build_url_address(URLS.URL_POLICIES, portaddr=('127.0.0.1', CPARAMS.POLICIES_PORT))) LOG.debug('Agent Flow request result: {}'.format(r.json())) LOG.debug('Stopping thread activity.') return
def main(): LOG.info('API documentation page at: http://{}:{}/'.format( 'localhost', CPARAMS.POLICIES_PORT)) app.run(debug=False, host='0.0.0.0', port=CPARAMS.POLICIES_PORT)
def __trigger_startDiscoveryWatchLeader(self): r = requests.get(self.URL_DISCOVERY_WATCH_LEADER) rjson = r.json() LOG.info(self.TAG + 'Discovery: {}'.format(rjson))
def get(self, role): global arearesilience """Promotion/Demotion of the agent role.""" imLeader = arearesilience.imLeader() imBackup = arearesilience.imBackup() if role.lower() == 'leader': # Do you want to be a leader? if imLeader: # If a leader is promoted to leader, it becomes a super-leader? LOG.debug('Role change: Leader -> Leader') return {'imLeader': imLeader, 'imBackup': imBackup}, 403 elif imBackup: # Hi, I'm backup-kun - It's my time to shine!! LOG.debug('Role change: Backup -> Leader') ret = agentstart.switch(imLeader=True) if ret: LOG.info('Successful promotion to Leader') else: LOG.warning('Unsuccessful promotion from Backup to Leader') return {'imLeader': True, 'imBackup': False}, 200 else: # Nor leader, nor Backup, just a normal agent # For reelection, first you must be a backup! LOG.debug('Role change: Agent -> Leader') return {'imLeader': imLeader, 'imBackup': imBackup}, 403 elif role.lower() == 'backup': # Always have a B plan if imLeader: # Why in the hell a Leader'll become a backup? LOG.debug('Role change: Leader -> Backup') return {'imLeader': imLeader, 'imBackup': imBackup}, 403 elif imBackup: # Emm... no pls. LOG.debug('Role change: Backup -> Backup') return {'imLeader': imLeader, 'imBackup': imBackup}, 403 else: # Can you watch my shoulder? LOG.debug('Role change: Agent -> Backup') leaderIP = request.environ.get('HTTP_X_REAL_IP', request.remote_addr) LOG.debug( 'Leader at {} is selecting me as Backup'.format(leaderIP)) ret = arearesilience.promotedToBackup( leaderIP=agentstart.leaderIP ) # TODO: get leaderIP from CIMI if ret: LOG.info('Successful promotion to Backup') return {'imLeader': imLeader, 'imBackup': True}, 200 else: LOG.warning('Unsuccessful promotion from Agent to Backup') return { 'imLeader': arearesilience.imLeader(), 'imBackup': arearesilience.imBackup() }, 403 elif role.lower() == 'agent': # Bigger will be the fall.... if imLeader: # You are shuch an incompetent, you're FIRED! # Leader demotion LOG.debug('Role change: Leader -> Agent') arearesilience.stop() agentstart.switch(imLeader=False) CPARAMS.LEADER_FLAG = False arearesilience = AreaResilience(cimi) arearesilience.start(agentstart.deviceID) return {'imLeader': False, 'imBackup': False}, 200 elif imBackup: # Maybe we are gona call you latter.... or not # Backup demotion LOG.debug('Role change: Backup -> Agent') arearesilience.stop() arearesilience = AreaResilience(cimi) arearesilience.start(agentstart.deviceID) return {'imLeader': False, 'imBackup': False}, 200 else: # You're so tiny that I don't even care. LOG.debug('Role change: Agent -> Agent') return {'imLeader': False, 'imBackup': False}, 403 else: # keikaku doori... Weird syntax maybe? return {'imLeader': imLeader, 'imBackup': imBackup}, 404
def get(self): """Policies Healthcheck""" payload = { 'health': False, # Needs change 'startup': agentstart.isCompleted, 'startup_time': CPARAMS.STARTUP_TIME_HEALTH, 'status': 'BLACK', # Needs change 'API': True, 'discovery': not agentstart.discovery_failed if agentstart.discovery_failed is not None else False, 'identification': not agentstart.identification_failed if agentstart.identification_failed is not None else False, 'cau-client': not agentstart.cauclient_failed if agentstart.cauclient_failed is not None else False, 'res-cat': not agentstart.categorization_failed if agentstart.categorization_failed is not None else False, 'area-resilience': not agentstart.policies_failed if agentstart.policies_failed is not None else False, 'vpn-client': True if agentstart.vpnIP is not None and agentstart.vpnIP != '' else False, 'deviceIP': True if agentstart.deviceIP is not None and agentstart.deviceIP != '' else False, 'leaderIP': True if (agentstart.leaderIP is not None and agentstart.leaderIP != '') or CPARAMS.CLOUD_FLAG else False, 'cloudIP': True if (agentstart.cloudIP is not None and agentstart.cloudIP != '') or CPARAMS.CLOUD_FLAG else False, 'deviceID': True if agentstart.deviceID is not None and agentstart.deviceID != '' else False, 'backupElected': True if arearesilience.getAmountActiveBackups() > 0 or not agentstart.imLeader or agentstart.imCloud else False, 'leaderfound': True if (agentstart.bssid is not None and agentstart.bssid != '') or agentstart.imLeader or agentstart.imCloud else False, 'JOIN-MYIP': True if (agentstart.discovery_joined is not None and agentstart.discovery_joined) or agentstart.imCloud or agentstart.imLeader else False, 'wifi-iface': True if CPARAMS.WIFI_DEV_FLAG != '' or CPARAMS.CLOUD_FLAG else False, 'agent-resource': True if agentstart._cimi_agent_resource_id is not None and agentstart._cimi_agent_resource_id != '' else False } components = payload['identification'] and payload[ 'discovery'] and payload['cau-client'] and payload[ 'res-cat'] and payload['area-resilience'] and payload[ 'vpn-client'] components_no_discovery = payload['identification'] and payload[ 'cau-client'] and payload['res-cat'] and payload[ 'area-resilience'] and payload['vpn-client'] ips = payload['deviceIP'] and payload['leaderIP'] and payload['cloudIP'] discovery_fail_ip_ok = not payload['discovery'] and payload[ 'deviceIP'] and payload['leaderIP'] discovery_ok_leader_notfound_vpn_ok = payload[ 'discovery'] and not payload['leaderfound'] and payload[ 'vpn-client'] and payload['deviceIP'] and payload['leaderIP'] # GREEN STATUS EVALUATION if not components_no_discovery or not ips or not payload[ 'deviceID'] or not payload['agent-resource']: if time() - startup_time < payload['startup_time']: payload['status'] = 'ORANGE' else: payload['status'] = 'RED' payload['health'] = False elif discovery_fail_ip_ok or discovery_ok_leader_notfound_vpn_ok or not payload[ 'backupElected'] or not payload['startup']: if time() - startup_time < payload['startup_time']: payload['status'] = 'ORANGE' payload['health'] = False else: payload['status'] = 'YELLOW' payload['health'] = True elif components and ips and payload['backupElected'] and payload[ 'deviceID'] and payload['startup']: payload['status'] = 'GREEN' payload['health'] = True else: payload['status'] = 'YELLOW' payload['health'] = True LOG.info( 'Policies Health={} Status={} Started={} Components={} IPs={} ID={} AgentRes={}' .format(payload['health'], payload['status'], payload['startup'], components, ips, payload['deviceID'], payload['agent-resource'])) status_code = 200 if payload['health'] else 400 return payload, status_code
__version__ = '2.0.13b' __author__ = 'Universitat Politècnica de Catalunya' # ### Global Variables ### # arearesilience = AreaResilience() agentstart = AgentStart() startup_time = time() # ### main.py code ### # # Set Logger if CPARAMS.DEBUG_FLAG: LOG.setLevel(DEBUG) else: LOG.setLevel(INFO) LOG.info('Policies Module. Version {} Status {}'.format( __version__, __status__)) # Print env variables LOG.debug('Environment Variables: {}'.format(CPARAMS.get_all())) # Prepare Server app = Flask(__name__) app.url_map.strict_slashes = False api = Api(app, version=__version__, title='Policies Module API', description='Resource Manager - Agent Controller') pl = api.namespace('api/v2/resource-management/policies', description='Policies Module Operations') rm = api.namespace('rm', description='Resource Manager Operations')
from common.CIMI import CIMIcalls as CIMI __status__ = 'Test' __version__ = '1.0' DEBUG_FLAG = environ.get('DEBUG', default='False') == 'True' RECOMMENDER_PORT = int(environ.get('RECOMMENDER_PORT', default=46020)) # Set Logger if DEBUG_FLAG: LOG.setLevel(DEBUG) else: LOG.setLevel(INFO) LOG.info('Recommender Test Module. Version {} Status {}'.format(__version__,__status__)) app = Flask(__name__) app.url_map.strict_slashes = False @app.route('/mf2c/optimal', methods=['POST'], strict_slashes=False) def recommender_get_ips(): topology = CIMI.get_topology() agent_res, res_id = CIMI.getAgentResource() if res_id != '' and 'device_ip' in agent_res: self_device_ip = agent_res['device_ip'] else: self_device_ip = None response = [{'ipaddress': ip[1]} for ip in topology] if self_device_ip is not None:
def __backupSelection(self): """ :return: """ # TODO: # 1- Check Backups # 2- Enough backups? # YES: End sleep(X) # NO: # 3- Get topology and select one agent # If not capable: Go to 3 # 4- promote to Backup # Success: Correct_backups++ # Go to 2 while self._connected: correct_backups = 0 with self.backupDatabaseLock: # Check backups for backup in self.backupDatabase: if backup.TTL >= 0: correct_backups += 1 # Enough? if correct_backups >= self._lpp.get(self._lpp.BACKUP_MINIMUM, default=1): # Enough backups LOG.debug( '{} correct backup detected in Leader. Everything is OK.'. format(correct_backups)) else: # Not enough if not self._connected: break LOG.warning( '{} backup dettected are not enough. Electing new ones...'. format(correct_backups)) topology = self.__getTopology() new_backups = [] while self._connected and correct_backups < self._lpp.get( self._lpp.BACKUP_MINIMUM, default=1) and len(topology) > 0: device = topology[0] topology.remove(device) found = False with self.backupDatabaseLock: for backup in self.backupDatabase: if backup.deviceID == device.get('deviceID'): found = True break if not found: # Todo: Evaluate if selected device is capable correct = self.__send_election_message( device.get('deviceIP')) if correct: new_backup = BackupEntry(device.get('deviceID'), device.get('deviceIP'), self._nextPriority) with self.backupDatabaseLock: self.backupDatabase.append(new_backup) LOG.info( 'Backup {}[{}] added with priority {}'.format( device.get('deviceID'), device.get('deviceIP'), self._nextPriority)) correct_backups += 1 self._nextPriority += 1 new_backups.append(new_backups) if correct_backups >= self._lpp.get(self._lpp.BACKUP_MINIMUM, default=1): # Now we have enough LOG.info( '{} correct backups dettected in Leader. {} new backups added.' .format(correct_backups, len(new_backups))) else: LOG.warning( '{} backups dettected are not enough. Waiting for new election.' .format(correct_backups)) # Sleep if self._connected: sleep(self._lpp.get(self._lpp.TIME_TO_WAIT_BACKUP_SELECTION)) LOG.info('Leader stopped...')
def __becomeBackup(self): """ :return: """ # 1- Send the KeepAlive message to the leader. # 2- Receive the reply (with preference number). # If leader down, Backup becomes leader. # Else repeat. attempt = 0 counter = 0 payload = {'deviceID': self._deviceID} self._imBackup = True while self._connected and attempt < self._lpp.get( self._lpp.MAX_RETRY_ATTEMPTS): stopLoop = False while self._connected and not stopLoop: try: # 1. Requests to Leader Keepalive endpoint r = requests.post(URLS.build_url_address( URLS.URL_POLICIES_KEEPALIVE, portaddr=(self._leaderIP, CPARAMS.POLICIES_PORT)), json=payload, timeout=0.5) LOG.debug(self.TAG + 'Keepalive sent [#{}]'.format(counter)) # 2. Process Reply jreply = r.json() if r.status_code == 200: leaderID = jreply['deviceID'] # Todo: Use this priority = jreply['backupPriority'] # 3. Update Preference self._backupPriority = priority LOG.debug( self.TAG + 'Reply received, Leader still alive: LeaderID: {}'. format(leaderID)) attempt = 0 else: # Error? LOG.error('KeepAlive status_code = {}'.format( r.status_code)) if r.status_code == 403 and self.PRIORITY_ON_DEMOTION == jreply[ 'backupPriority']: LOG.warning( 'Backup has been removed from database or not authorized to send keepalive messages' ) elif r.status_code == 405 and self.PRIORITY_ON_FAILURE == jreply[ 'backupPriority']: LOG.warning( 'Sending message to a Device that is not a Leader!' ) stopLoop = True else: stopLoop = True if not stopLoop: # 4. Sleep sleep(self._lpp.get(self._lpp.TIME_KEEPALIVE)) counter += 1 except: # Connection broke, backup assumes that Leader is down. LOG.debug('Keepalive connection refused') stopLoop = True LOG.warning( 'Keepalive connection is broken... Retry Attempts: {}'.format( self._lpp.get(self._lpp.MAX_RETRY_ATTEMPTS) - (attempt + 1))) attempt += 1 if not self._connected: LOG.info('Backup stopped.') else: LOG.warning(self.TAG + '## LEADER IS DOWN! ##') self._leaderFailed = True return
def __cloud_flow(self): LOG.info(self.TAG + 'Cloud flow started.') # 0. Cloud Agent is Leader by definition self.imLeader = self.imCloud # 1. Discovery LOG.debug(self.TAG + 'Discovery trigger ignored in Cloud flow.') self.discovery_failed = False self.discovery_leader_failed = False self.detectedLeaderID = self.deviceID # 2. Start CAU-client if self._connected: self.cauclient_failed = True LOG.debug(self.TAG + 'Sending trigger to CAU client...') attempt = 0 r = False while self._connected and not r and attempt < self.MAX_CAUCLIENT_FAILURES: try: r = self.__trigger_triggerCAUclient() self.cauclient_failed = not r except Exception: LOG.exception(self.TAG + 'CAUclient failed.') self.cauclient_failed = True finally: attempt += 1 if not r: sleep(self.WAIT_TIME_CAUCLIENT) LOG.info(self.TAG + 'CAU client Trigger Done in {} attempts and ok={}.'. format(attempt, r)) else: return if not CPARAMS.DEBUG_FLAG and self.cauclient_failed: LOG.critical(self.TAG + 'CAU-Client failed, interrupting agent start.') return # 3. VPN get IP attempt = 0 while self._connected and self.vpnIP is None and attempt < self.MAX_VPN_FAILURES: vpn_ip = VPN.getIPfromFile() self.vpnIP = vpn_ip if vpn_ip != '' else None if self.vpnIP is None: LOG.debug(self.TAG + 'VPN IP cannot be obtained... Retry in {}s'.format( self.WAIT_TIME_VPN)) sleep(self.WAIT_TIME_VPN) attempt += 1 if self.vpnIP is None: LOG.warning(self.TAG + 'VPN IP cannot be obtained.') if not CPARAMS.DEBUG_FLAG: LOG.critical( self.TAG + 'Policies module cannot continue its activity without VPN IP' ) exit(4) else: LOG.info(self.TAG + 'VPN IP: [{}]'.format(self.vpnIP)) # 4. Switch leader categorization (or start if not started) if self._connected and not self.categorization_started: self.categorization_failed = True LOG.debug(self.TAG + 'Sending start trigger to Categorization...') try: self.__trigger_startCategorization() self.categorization_failed = False self.categorization_started = True except Exception: LOG.exception(self.TAG + 'Categorization failed') self.categorization_failed = True LOG.info(self.TAG + 'Categorization Start Trigger Done.') elif not self._connected: return if not CPARAMS.DEBUG_FLAG and self.categorization_failed: LOG.critical(self.TAG + 'Categorization failed, interrupting agent start.') return # 5. Area Resilience LOG.debug(self.TAG + 'Area Resilience trigger ignored in Cloud flow.') self.policies_failed = False # Print summary self.__print_summary() # Create Agent Resource self.deviceIP = self.vpnIP self.leaderIP = None self._cimi_agent_resource = AgentResource(self.deviceID, self.deviceIP, True, True, self.imLeader) LOG.debug(self.TAG + 'CIMI Agent Resource payload: {}'.format( self._cimi_agent_resource.getCIMIdicc())) if self._cimi_agent_resource_id is None: # Create agent resource self._cimi_agent_resource_id = CIMI.createAgentResource( self._cimi_agent_resource.getCIMIdicc()) if self._cimi_agent_resource_id == '': LOG.warning(self.TAG + 'Agent resource creation failed.') if not CPARAMS.DEBUG_FLAG: LOG.error( 'Stopping Policies module due to resource creation failure.' ) exit(4) else: # Agent resource already exists status = CIMI.modify_resource( self._cimi_agent_resource_id, self._cimi_agent_resource.getCIMIdicc()) self.isCompleted = True return