def get_app(): from cnaas_nms.scheduler.scheduler import Scheduler from cnaas_nms.plugins.pluginmanager import PluginManagerHandler from cnaas_nms.db.session import sqla_session from cnaas_nms.db.joblock import Joblock from cnaas_nms.db.job import Job # If running inside uwsgi, a separate "mule" will run the scheduler try: import uwsgi print("Running inside uwsgi") except (ModuleNotFoundError, ImportError): scheduler = Scheduler() scheduler.start() pmh = PluginManagerHandler() pmh.load_plugins() try: with sqla_session() as session: Joblock.clear_locks(session) except Exception as e: print("Unable to clear old locks from database at startup: {}".format(str(e))) try: with sqla_session() as session: Job.clear_jobs(session) except Exception as e: print("Unable to clear jobs with invalid states: {}".format(str(e))) return app.app
def refresh_repo(repo_type: RepoType = RepoType.TEMPLATES, scheduled_by: str = None) -> str: """Refresh the repository for repo_type Args: repo_type: Which repository to refresh Returns: String describing what was updated. Raises: cnaas_nms.db.settings.SettingsSyntaxError cnaas_nms.db.joblock.JoblockError """ # Acquire lock for devices to make sure no one refreshes the repository # while another task is building configuration for devices using repo data with sqla_session() as session: job = Job() job.start_job(function_name="refresh_repo", scheduled_by=scheduled_by) session.add(job) session.flush() job_id = job.id logger.info( "Trying to acquire lock for devices to run refresh repo: {}". format(job_id)) if not Joblock.acquire_lock(session, name='devices', job_id=job_id): raise JoblockError( "Unable to acquire lock for configuring devices") try: result = _refresh_repo_task(repo_type) job.finish_time = datetime.datetime.utcnow() job.status = JobStatus.FINISHED job.result = {"message": result, "repository": repo_type.name} try: logger.info( "Releasing lock for devices from refresh repo job: {}". format(job_id)) Joblock.release_lock(session, job_id=job_id) except Exception: logger.error( "Unable to release devices lock after refresh repo job") return result except Exception as e: logger.exception( "Exception while scheduling job for refresh repo: {}".format( str(e))) job.finish_time = datetime.datetime.utcnow() job.status = JobStatus.EXCEPTION job.result = {"error": str(e), "repository": repo_type.name} try: logger.info( "Releasing lock for devices from refresh repo job: {}". format(job_id)) Joblock.release_lock(session, job_id=job_id) except Exception: logger.error( "Unable to release devices lock after refresh repo job") raise e
def add_onetime_job(self, func: Union[str, FunctionType], when: Optional[int] = None, scheduled_by: Optional[str] = None, **kwargs) -> int: """Schedule a job to run at a later time. Args: func: The function to call when: Optional number of seconds to wait before starting job **kwargs: Arguments to pass through to called function Returns: int: job_id """ if when and isinstance(when, int): trigger = 'date' run_date = datetime.datetime.utcnow() + datetime.timedelta( seconds=when) else: trigger = None run_date = None with sqla_session() as session: job = Job() if run_date: job.scheduled_time = run_date session.add(job) session.flush() job_id = job.id kwargs['job_id'] = job_id kwargs['scheduled_by'] = scheduled_by if self.use_mule: try: import uwsgi except Exception as e: logger.exception("use_mule is set but not running in uwsgi") raise e args = dict(kwargs) if isinstance(func, FunctionType): args['func'] = str(func.__qualname__) else: args['func'] = str(func) args['trigger'] = trigger args['when'] = when args['id'] = str(job_id) uwsgi.mule_msg(json.dumps(args)) return job_id else: self._scheduler.add_job(func, trigger=trigger, kwargs=kwargs, id=str(job_id), run_date=run_date) return job_id
def post(self, hostname: str): """Restore configuration to previous version""" json_data = request.get_json() apply_kwargs = {'hostname': hostname} config = None if not Device.valid_hostname(hostname): return empty_result(status='error', data=f"Invalid hostname specified"), 400 if 'job_id' in json_data: try: job_id = int(json_data['job_id']) except Exception: return empty_result('error', "job_id must be an integer"), 400 else: return empty_result('error', "job_id must be specified"), 400 with sqla_session() as session: try: prev_config_result = Job.get_previous_config(session, hostname, job_id=job_id) failed = prev_config_result['failed'] if not failed and 'config' in prev_config_result: config = prev_config_result['config'] except JobNotFoundError as e: return empty_result('error', str(e)), 404 except InvalidJobError as e: return empty_result('error', str(e)), 500 except Exception as e: return empty_result('error', "Unhandled exception: {}".format(e)), 500 if failed: return empty_result( 'error', "The specified job_id has a failed status"), 400 if not config: return empty_result('error', "No config found in this job"), 500 if 'dry_run' in json_data and isinstance(json_data['dry_run'], bool) \ and not json_data['dry_run']: apply_kwargs['dry_run'] = False else: apply_kwargs['dry_run'] = True apply_kwargs['config'] = config scheduler = Scheduler() job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:apply_config', when=1, scheduled_by=get_jwt_identity(), kwargs=apply_kwargs, ) res = empty_result(data=f"Scheduled job to restore {hostname}") res['job_id'] = job_id return res, 200
def arista_pre_flight_check(task, job_id: Optional[str] = None) -> str: """ NorNir task to do some basic checks before attempting to upgrade a switch. Args: task: NorNir task Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Pre-flight aborted" flash_diskspace = 'bash timeout 5 df /mnt/flash | awk \'{print $4}\'' flash_cleanup = 'bash timeout 30 ls -t /mnt/flash/*.swi | tail -n +2 | grep -v `cut -d"/" -f2 /mnt/flash/boot-config` | xargs rm -f' # Get amount of free disk space res = task.run(napalm_cli, commands=[flash_diskspace]) if not isinstance(res, MultiResult) or len(res.result.keys()) != 1: raise Exception('Could not check free space') # Remove old firmware images if needed free_bytes = next(iter(res.result.values())).split('\n')[1] if int(free_bytes) < 2500000: logger.info('Cleaning up old firmware images on {}'.format( task.host.name)) res = task.run(napalm_cli, commands=[flash_cleanup]) else: logger.info('Enough free space ({}b), no cleanup'.format(free_bytes)) return "Pre-flight check done."
def arista_post_flight_check(task, post_waittime: int, job_id: Optional[str] = None) -> str: """ NorNir task to update device facts after a switch have been upgraded Args: task: NorNir task post_waittime: Time to wait before trying to gather facts Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() time.sleep(int(post_waittime)) logger.info( 'Post-flight check wait ({}s) complete, starting check for {}'.format( post_waittime, task.host.name)) with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Post-flight aborted" try: res = task.run(napalm_get, getters=["facts"]) os_version = res[0].result['facts']['os_version'] with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == task.host.name).one() prev_os_version = dev.os_version dev.os_version = os_version if prev_os_version == os_version: logger.error( "OS version did not change, activation failed on {}". format(task.host.name)) raise Exception("OS version did not change, activation failed") else: dev.confhash = None dev.synchronized = False except Exception as e: logger.exception("Could not update OS version on device {}: {}".format( task.host.name, str(e))) return 'Post-flight failed, could not update OS version: {}'.format( str(e)) return "Post-flight, OS version updated from {} to {}.".format( prev_os_version, os_version)
def get(self, hostname: str): args = request.args result = empty_result() result['data'] = {'config': None} if not Device.valid_hostname(hostname): return empty_result(status='error', data=f"Invalid hostname specified"), 400 kwargs = {} if 'job_id' in args: try: kwargs['job_id'] = int(args['job_id']) except Exception: return empty_result('error', "job_id must be an integer"), 400 elif 'previous' in args: try: kwargs['previous'] = int(args['previous']) except Exception: return empty_result('error', "previous must be an integer"), 400 elif 'before' in args: try: kwargs['before'] = datetime.datetime.fromisoformat( args['before']) except Exception: return empty_result( 'error', "before must be a valid ISO format date time string"), 400 with sqla_session() as session: try: result['data'] = Job.get_previous_config( session, hostname, **kwargs) except JobNotFoundError as e: return empty_result('error', str(e)), 404 except InvalidJobError as e: return empty_result('error', str(e)), 500 except Exception as e: return empty_result('error', "Unhandled exception: {}".format(e)), 500 return result
def arista_device_reboot(task, job_id: Optional[str] = None) -> str: """ NorNir task to reboot a single device. Args: task: NorNir task. Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Reboot aborted" try: res = task.run(netmiko_send_command, command_string='enable', expect_string='.*#') res = task.run(netmiko_send_command, command_string='write', expect_string='.*#') res = task.run(netmiko_send_command, command_string='reload force', max_loops=2, expect_string='.*') except Exception as e: logger.exception('Failed to reboot switch {}: {}'.format( task.host.name, str(e))) raise e return "Device reboot done."
def add_onetime_job(self, func: Union[str, FunctionType], when: Optional[int] = None, scheduled_by: Optional[str] = None, **kwargs) -> int: """Schedule a job to run at a later time on the mule worker or local scheduler depending on setup. Some extra checks against kwargs are performed here. If kwarg with name 'dry_run' is included, (dry_run) is appended to function name. If kwarg job_comment or job_ticket_ref are included, those fields in the job will be populated. Args: func: The function to call when: Optional number of seconds to wait before starting job scheduled_by: Username that scheduled the job **kwargs: Arguments to pass through to called function Returns: int: job_id """ if when and isinstance(when, int): trigger = 'date' run_date = datetime.datetime.utcnow() + datetime.timedelta( seconds=when) else: trigger = None run_date = None if isinstance(func, FunctionType): func_qualname = str(func.__qualname__) else: func_qualname = str(func) func_name = func_qualname.split(':')[-1] try: json.dumps(kwargs) except TypeError as e: raise TypeError("Job args must be JSON serializable: {}".format(e)) # Append (dry_run) to function name if set, so we can distinguish dry_run jobs try: if kwargs['kwargs']['dry_run']: func_name += " (dry_run)" except Exception: pass with sqla_session() as session: job = Job() if run_date: job.scheduled_time = run_date job.function_name = func_name if scheduled_by is None: scheduled_by = 'unknown' job.scheduled_by = scheduled_by job_comment = kwargs['kwargs'].pop('job_comment', None) if job_comment and isinstance(job_comment, str): job.comment = job_comment[:255] job_ticket_ref = kwargs['kwargs'].pop('job_ticket_ref', None) if job_ticket_ref and isinstance(job_comment, str): job.ticket_ref = job_ticket_ref[:32] job.start_arguments = kwargs['kwargs'] session.add(job) session.flush() job_id = job.id kwargs['job_id'] = job_id kwargs['scheduled_by'] = scheduled_by if self.use_mule: try: import uwsgi except Exception as e: logger.exception("use_mule is set but not running in uwsgi") raise e args = dict(kwargs) args['func'] = func_qualname args['trigger'] = trigger args['when'] = when args['id'] = str(job_id) uwsgi.mule_msg(json.dumps(args)) return job_id else: self.add_local_job(func, trigger=trigger, kwargs=kwargs, id=str(job_id), run_date=run_date, name=func_qualname) return job_id
def arista_firmware_activate(task, filename: str, job_id: Optional[str] = None) -> str: """ NorNir task to modify the boot config for new firmwares. Args: task: NorNir task filename: Name of the new firmware image Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Firmware activate aborted" try: boot_file_cmd = 'boot system flash:{}'.format(filename) res = task.run(netmiko_send_command, command_string='enable', expect_string='.*#') res = task.run( netmiko_send_command, command_string='show boot-config | grep -o "\\w*{}\\w*"'.format( filename)) if res.result == filename: raise FirmwareAlreadyActiveException( 'Firmware already activated in boot-config on {}'.format( task.host.name)) res = task.run(netmiko_send_command, command_string='conf t', expect_string='.*config.*#') res = task.run(netmiko_send_command, command_string=boot_file_cmd) res = task.run(netmiko_send_command, command_string='end', expect_string='.*#') res = task.run( netmiko_send_command, command_string='show boot-config | grep -o "\\w*{}\\w*"'.format( filename)) if not isinstance(res, MultiResult): raise Exception('Could not check boot-config on {}'.format( task.host.name)) if res.result != filename: raise Exception('Firmware not activated properly on {}'.format( task.host.name)) except FirmwareAlreadyActiveException as e: raise e except Exception as e: logger.exception('Failed to activate firmware on {}: {}'.format( task.host.name, str(e))) raise Exception('Failed to activate firmware') return "Firmware activate done."
def arista_firmware_download(task, filename: str, httpd_url: str, job_id: Optional[str] = None) -> str: """ NorNir task to download firmware image from the HTTP server. Args: task: NorNir task filename: Name of the file to download httpd_url: Base URL to the HTTP server Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Firmware download aborted" url = httpd_url + '/' + filename # Make sure netmiko doesn't use fast_cli because it will change delay_factor # that is set in task.run below and cause early timeouts net_connect = task.host.get_connection("netmiko", task.nornir.config) net_connect.fast_cli = False try: with sqla_session() as session: dev: Device = session.query(Device).\ filter(Device.hostname == task.host.name).one_or_none() device_type = dev.device_type if device_type == DeviceType.ACCESS: firmware_download_cmd = 'copy {} flash:'.format(url) else: firmware_download_cmd = 'copy {} vrf MGMT flash:'.format(url) res = task.run(netmiko_send_command, command_string=firmware_download_cmd.replace("//", "/"), enable=True, delay_factor=30, max_loops=200) if 'Copy completed successfully' in res.result: return "Firmware download done." else: logger.debug("Firmware download failed on {} ('{}'): {}".format( task.host.name, firmware_download_cmd, res.result)) raise Exception( "Copy command did not complete successfully: {}".format( ', '.join( filter(lambda x: x.startswith('get:'), res.result.splitlines())))) except NornirSubTaskError as e: subtask_result = e.result[0] logger.error('{} failed to download firmware: {}'.format( task.host.name, subtask_result)) logger.debug('{} download subtask result: {}'.format( task.host.name, subtask_result.result)) raise Exception( 'Failed to download firmware: {}'.format(subtask_result)) except Exception as e: logger.error('{} failed to download firmware: {}'.format( task.host.name, e)) raise Exception('Failed to download firmware: {}'.format(e)) return "Firmware download done."