def generate_only(hostname: str) -> (str, dict): """ Generate configuration for a device and return it as a text string. Args: hostname: Hostname of device generate config for Returns: (string with config, dict with available template variables) """ logger = get_logger() nr = cnaas_init() nr_filtered, _, _ = inventory_selector(nr, hostname=hostname) template_vars = {} if len(nr_filtered.inventory.hosts) != 1: raise ValueError("Invalid hostname: {}".format(hostname)) try: nrresult = nr_filtered.run(task=push_sync_device, generate_only=True) if nrresult[hostname][0].failed: raise Exception("Could not generate config for device {}: {}".format( hostname, nrresult[hostname][0].result )) if "template_vars" in nrresult[hostname][1].host: template_vars = nrresult[hostname][1].host["template_vars"] if nrresult.failed: print_result(nrresult) raise Exception("Failed to generate config for {}".format(hostname)) return nrresult[hostname][1].result, template_vars except Exception as e: logger.exception("Exception while generating config: {}".format(str(e))) if len(nrresult[hostname]) >= 2: return nrresult[hostname][1].result, template_vars else: return str(e), template_vars
def sync_check_hash(task, force=False, job_id=None): """ Start the task which will compare device configuration hashes. Args: task: Nornir task force: Ignore device hash """ set_thread_data(job_id) logger = get_logger() if force is True: return with sqla_session() as session: stored_hash = Device.get_config_hash(session, task.host.name) if stored_hash is None: return task.host.open_connection("napalm", configuration=task.nornir.config) res = task.run(task=napalm_get, getters=["config"]) task.host.close_connection("napalm") running_config = dict(res.result)['config']['running'].encode() if running_config is None: raise Exception('Failed to get running configuration') hash_obj = sha256(running_config) running_hash = hash_obj.hexdigest() if stored_hash != running_hash: raise Exception('Device {} configuration is altered outside of CNaaS!'.format(task.host.name))
def renew_cert_task(task, job_id: str) -> str: set_thread_data(job_id) logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device). \ filter(Device.hostname == task.host.name).one_or_none() ip = dev.management_ip if not ip: raise Exception("Device {} has no management_ip".format( task.host.name)) try: generate_device_cert(task.host.name, ipv4_address=ip) except Exception as e: raise Exception( "Could not generate certificate for device {}: {}".format( task.host.name, e)) if task.host.platform == "eos": try: res = task.run(task=arista_copy_cert, job_id=job_id) except Exception as e: logger.exception('Exception while copying certificates: {}'.format( str(e))) raise e else: raise ValueError("Unsupported platform: {}".format(task.host.platform)) return "Certificate renew success for device {}".format(task.host.name)
def push_static_config(task, config: str, dry_run: bool = True, job_id: Optional[str] = None, scheduled_by: Optional[str] = None): """ Nornir task to push static config to device Args: task: nornir task, sent by nornir when doing .run() config: static config to apply dry_run: Don't commit config to device, just do compare/diff scheduled_by: username that triggered job Returns: """ set_thread_data(job_id) logger = get_logger() logger.debug("Push static config to device: {}".format(task.host.name)) task.run(task=napalm_configure, name="Push static config", replace=True, configuration=config, dry_run=dry_run )
def ztp_device_cert(task, job_id: str, new_hostname: str, management_ip: str) -> str: set_thread_data(job_id) logger = get_logger() try: ipv4: IPv4Address = IPv4Address(management_ip) generate_device_cert(new_hostname, ipv4_address=ipv4) except Exception as e: raise Exception( "Could not generate certificate for device {}: {}".format( new_hostname, e)) if task.host.platform == "eos": try: # TODO: subtaskerror? res = task.run(task=arista_copy_cert, job_id=job_id) except Exception as e: logger.exception('Exception while copying certificates: {}'.format( str(e))) raise e else: return "Install device certificate not supported on platform: {}".format( task.host.platform) return "Device certificate installed for {}".format(new_hostname)
def arista_pre_flight_check(task, job_id: Optional[str] = None) -> str: """ NorNir task to do some basic checks before attempting to upgrade a switch. Args: task: NorNir task Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Pre-flight aborted" flash_diskspace = 'bash timeout 5 df /mnt/flash | awk \'{print $4}\'' flash_cleanup = 'bash timeout 30 ls -t /mnt/flash/*.swi | tail -n +2 | grep -v `cut -d"/" -f2 /mnt/flash/boot-config` | xargs rm -f' # Get amount of free disk space res = task.run(napalm_cli, commands=[flash_diskspace]) if not isinstance(res, MultiResult) or len(res.result.keys()) != 1: raise Exception('Could not check free space') # Remove old firmware images if needed free_bytes = next(iter(res.result.values())).split('\n')[1] if int(free_bytes) < 2500000: logger.info('Cleaning up old firmware images on {}'.format( task.host.name)) res = task.run(napalm_cli, commands=[flash_cleanup]) else: logger.info('Enough free space ({}b), no cleanup'.format(free_bytes)) return "Pre-flight check done."
def get_uplinks(session, hostname: str) -> Dict[str, str]: """Returns dict with mapping of interface -> neighbor hostname""" logger = get_logger() # TODO: check if uplinks are already saved in database? uplinks = {} dev = session.query(Device).filter(Device.hostname == hostname).one() neighbor_d: Device for neighbor_d in dev.get_neighbors(session): if neighbor_d.device_type == DeviceType.DIST: local_if = dev.get_neighbor_local_ifname(session, neighbor_d) # TODO: check that dist interface is configured as downlink if local_if: uplinks[local_if] = neighbor_d.hostname elif neighbor_d.device_type == DeviceType.ACCESS: intfs: Interface = session.query(Interface).filter(Interface.device == neighbor_d). \ filter(InterfaceConfigType == InterfaceConfigType.ACCESS_DOWNLINK).all() local_if = dev.get_neighbor_local_ifname(session, neighbor_d) remote_if = neighbor_d.get_neighbor_local_ifname(session, dev) intf: Interface for intf in intfs: if intf.name == remote_if: uplinks[local_if] = neighbor_d.hostname logger.debug("Uplinks for device {} detected: {}".format( hostname, ', '.join([ "{}: {}".format(ifname, hostname) for ifname, hostname in uplinks.items() ]))) return uplinks
def push_base_management_access(task, device_variables, job_id): set_thread_data(job_id) logger = get_logger() logger.debug("Push basetemplate for host: {}".format(task.host.name)) with open('/etc/cnaas-nms/repository.yml', 'r') as db_file: repo_config = yaml.safe_load(db_file) local_repo_path = repo_config['templates_local'] mapfile = os.path.join(local_repo_path, task.host.platform, 'mapping.yml') if not os.path.isfile(mapfile): raise RepoStructureException( "File {} not found in template repo".format(mapfile)) with open(mapfile, 'r') as f: mapping = yaml.safe_load(f) template = mapping['ACCESS']['entrypoint'] settings, settings_origin = get_settings(task.host.name, DeviceType.ACCESS) # Add all environment variables starting with TEMPLATE_SECRET_ to # the list of configuration variables. The idea is to store secret # configuration outside of the templates repository. template_secrets = {} for env in os.environ: if env.startswith('TEMPLATE_SECRET_'): template_secrets[env] = os.environ[env] # Merge dicts, this will overwrite interface list from settings template_vars = {**settings, **device_variables, **template_secrets} r = task.run(task=text.template_file, name="Generate initial device config", template=template, path=f"{local_repo_path}/{task.host.platform}", **template_vars) #TODO: Handle template not found, variables not defined task.host["config"] = r.result # Use extra low timeout for this since we expect to loose connectivity after changing IP task.host.connection_options["napalm"] = ConnectionOptions( extras={"timeout": 30}) try: task.run(task=networking.napalm_configure, name="Push base management config", replace=True, configuration=task.host["config"], dry_run=False) except Exception: task.run(task=networking.napalm_get, getters=["facts"]) if not task.results[-1].failed: raise InitError( "Device {} did not commit new base management config".format( task.host.name))
def update_interfacedb_worker( session, dev: Device, replace: bool, delete: bool, mlag_peer_hostname: Optional[str] = None) -> List[dict]: """Perform actual work of updating database for update_interfacedb""" logger = get_logger() ret = [] iflist = get_interfaces_names(dev.hostname) uplinks = get_uplinks(session, dev.hostname) if mlag_peer_hostname: mlag_ifs = get_mlag_ifs(session, dev.hostname, mlag_peer_hostname) else: mlag_ifs = {} phy_interfaces = filter_interfaces(iflist, platform=dev.platform, include='physical') for intf_name in phy_interfaces: intf: Interface = session.query(Interface).filter(Interface.device == dev). \ filter(Interface.name == intf_name).one_or_none() if intf: new_intf = False else: new_intf = True intf: Interface = Interface() if not new_intf and delete: # 'not new_intf' means interface exists in database logger.debug( "Deleting interface {} on device {} from interface DB".format( intf_name, dev.hostname)) session.delete(intf) continue elif not new_intf and not replace: continue logger.debug( "New/updated physical interface found on device {}: {}".format( dev.hostname, intf_name)) if intf_name in uplinks.keys(): intf.configtype = InterfaceConfigType.ACCESS_UPLINK intf.data = {'neighbor': uplinks[intf_name]} elif intf_name in mlag_ifs.keys(): intf.configtype = InterfaceConfigType.MLAG_PEER intf.data = {'neighbor_id': mlag_ifs[intf_name]} else: intf.configtype = InterfaceConfigType.ACCESS_AUTO intf.name = intf_name intf.device = dev if new_intf: session.add(intf) ret.append(intf.as_dict()) session.commit() return ret
def resolve_vlanid(vlan_name: str, vxlans: dict) -> Optional[int]: logger = get_logger() if type(vlan_name) == int: return int(vlan_name) if not isinstance(vlan_name, str): return None for vxlan_name, vxlan_data in vxlans.items(): try: if vxlan_data['vlan_name'] == vlan_name: return int(vxlan_data['vlan_id']) except (KeyError, ValueError) as e: logger.error("Could not resolve VLAN ID for VLAN name {}: {}".format(vlan_name, str(e))) return None
def arista_post_flight_check(task, post_waittime: int, job_id: Optional[str] = None) -> str: """ NorNir task to update device facts after a switch have been upgraded Args: task: NorNir task post_waittime: Time to wait before trying to gather facts Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() time.sleep(int(post_waittime)) logger.info( 'Post-flight check wait ({}s) complete, starting check for {}'.format( post_waittime, task.host.name)) with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Post-flight aborted" try: res = task.run(napalm_get, getters=["facts"]) os_version = res[0].result['facts']['os_version'] with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == task.host.name).one() prev_os_version = dev.os_version dev.os_version = os_version if prev_os_version == os_version: logger.error( "OS version did not change, activation failed on {}". format(task.host.name)) raise Exception("OS version did not change, activation failed") else: dev.confhash = None dev.synchronized = False except Exception as e: logger.exception("Could not update OS version on device {}: {}".format( task.host.name, str(e))) return 'Post-flight failed, could not update OS version: {}'.format( str(e)) return "Post-flight, OS version updated from {} to {}.".format( prev_os_version, os_version)
def get_interface_states(hostname) -> dict: logger = get_logger() nr = cnaas_init() nr_filtered = nr.filter(name=hostname).filter(managed=True) if len(nr_filtered.inventory) != 1: raise ValueError(f"Hostname {hostname} not found in inventory") nrresult = nr_filtered.run(task=napalm_get, getters=["interfaces"]) if not len(nrresult) == 1: raise Exception(f"Could not get interfaces for {hostname}: no Nornir result") if nrresult.failed or nrresult[hostname].failed: raise Exception("Could not get interfaces for {}, NAPALM failed: {}".format( hostname, nrresult[hostname].exception )) return nrresult[hostname][0].result['interfaces']
def renew_cert(hostname: Optional[str] = None, group: Optional[str] = None, job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: logger = get_logger() nr = cnaas_init() if hostname: nr_filtered, dev_count, _ = inventory_selector(nr, hostname=hostname) elif group: nr_filtered, dev_count, _ = inventory_selector(nr, group=group) else: raise ValueError("Neither hostname nor group specified for renew_cert") device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for renew certificate ({}): {}".format( dev_count, ", ".join(device_list))) supported_platforms = ['eos'] # Make sure we only attempt supported devices for device in device_list: with sqla_session() as session: dev: Device = session.query(Device). \ filter(Device.hostname == device).one_or_none() if not dev: raise Exception('Could not find device: {}'.format(device)) if dev.platform not in supported_platforms: raise Exception( 'Unsupported device platform "{}" for device: {}'.format( dev.platform, device)) try: nrresult = nr_filtered.run(task=renew_cert_task, job_id=job_id) except Exception as e: logger.exception('Exception while renewing certificates: {}'.format( str(e))) return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) for hostname in failed_hosts: logger.error( "Certificate renew on device '{}' failed".format(hostname)) if nrresult.failed: logger.error("Not all devices got new certificates") return NornirJobResult(nrresult=nrresult)
def update_facts(hostname: str, job_id: Optional[str] = None, scheduled_by: Optional[str] = None): logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == hostname).one_or_none() if not dev: raise ValueError( "Device with hostname {} not found".format(hostname)) if not (dev.state == DeviceState.MANAGED or dev.state == DeviceState.UNMANAGED): raise ValueError( "Device with hostname {} is in incorrect state: {}".format( hostname, str(dev.state))) hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=networking.napalm_get, getters=["facts"]) if nrresult.failed: logger.error( "Could not contact device with hostname {}".format(hostname)) return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == hostname).one() dev.serial = facts['serial_number'] dev.vendor = facts['vendor'] dev.model = facts['model'] dev.os_version = facts['os_version'] logger.debug("Updating facts for device {}: {}, {}, {}, {}".format( hostname, facts['serial_number'], facts['vendor'], facts['model'], facts['os_version'])) except Exception as e: logger.exception( "Could not update device with hostname {} with new facts: {}". format(hostname, str(e))) logger.debug("Get facts nrresult for hostname {}: {}".format( hostname, nrresult)) raise e return NornirJobResult(nrresult=nrresult)
def get_evpn_spines(session, settings: dict): logger = get_logger() device_hostnames = [] for entry in settings['evpn_peers']: if 'hostname' in entry and Device.valid_hostname(entry['hostname']): device_hostnames.append(entry['hostname']) else: logger.error( "Invalid entry specified in settings->evpn_peers, ignoring: {}" .format(entry)) ret = [] for hostname in device_hostnames: dev = session.query(Device).filter( Device.hostname == hostname).one_or_none() if dev: ret.append(dev) return ret
def update_config_hash(task): logger = get_logger() try: res = task.run(task=napalm_get, getters=["config"]) if not isinstance(res, MultiResult) or len(res) != 1 or not isinstance(res[0].result, dict) \ or 'config' not in res[0].result: raise Exception("Unable to get config from device") new_config_hash = calc_config_hash(task.host.name, res[0].result['config']['running']) if not new_config_hash: raise ValueError("Empty config hash") except Exception as e: logger.exception("Unable to get config hash: {}".format(str(e))) raise e else: with sqla_session() as session: Device.set_config_hash(session, task.host.name, new_config_hash) logger.debug("Config hash for {} updated to {}".format(task.host.name, new_config_hash))
def check_settings_syntax(settings_dict: dict, settings_metadata_dict: dict) -> dict: """Verify settings syntax and return a somewhat helpful error message. Raises: SettingsSyntaxError """ logger = get_logger() try: ret_dict = f_root(**settings_dict).dict() except ValidationError as e: msg = '' for num, error in enumerate(e.errors()): # If there are two errors and the last one is of type none allowed # then skip recording the second error because it's an implication # of the first error (the value has to be correct or none) # TODO: handle multiple occurrences of this? if len( e.errors() ) == 2 and num == 1 and error['type'] == 'type_error.none.allowed': continue # TODO: Find a way to present customised error message when string # regex match fails instead of just showing the regex pattern. loc = error['loc'] origin = 'unknown' if loc[0] in settings_metadata_dict: origin = settings_metadata_dict[loc[0]] error_msg = "Validation error for setting {}, bad value: {} (value origin: {})\n".format( '->'.join(str(x) for x in loc), get_pydantic_error_value(settings_dict, loc), origin) try: pydantic_descr = get_pydantic_field_descr(f_root.schema(), loc) if pydantic_descr: pydantic_descr_msg = ", field should be: {}".format( pydantic_descr) else: pydantic_descr_msg = "" except Exception as e_pydantic_descr: logger.exception(e_pydantic_descr) pydantic_descr_msg = ", exception while getting pydantic description" error_msg += "Message: {}{}\n".format(error['msg'], pydantic_descr_msg) msg += error_msg raise SettingsSyntaxError(msg) else: return ret_dict
def get_mlag_ifs(session, hostname, mlag_peer_hostname) -> Dict[str, int]: """Returns dict with mapping of interface -> neighbor id Return id instead of hostname since mlag peer will change hostname during init""" logger = get_logger() mlag_ifs = {} dev = session.query(Device).filter(Device.hostname == hostname).one() for neighbor_d in dev.get_neighbors(session): if neighbor_d.hostname == mlag_peer_hostname: for local_if in dev.get_neighbor_local_ifnames( session, neighbor_d): mlag_ifs[local_if] = neighbor_d.id logger.debug("MLAG peer interfaces for device {} detected: {}".format( hostname, ', '.join([ "{}: {}".format(ifname, hostname) for ifname, hostname in mlag_ifs.items() ]))) return mlag_ifs
def apply_config(hostname: str, config: str, dry_run: bool, job_id: Optional[int] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Apply a static configuration (from backup etc) to a device. Args: hostname: Specify a single host by hostname to synchronize config: Static configuration to apply dry_run: Set to false to actually apply config to device job_id: Job ID number scheduled_by: Username from JWT Returns: NornirJobResult """ logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == hostname).one_or_none() if not dev: raise Exception("Device {} not found".format(hostname)) elif not (dev.state == DeviceState.MANAGED or dev.state == DeviceState.UNMANAGED): raise Exception("Device {} is in invalid state: {}".format( hostname, dev.state)) if not dry_run: dev.state = DeviceState.UNMANAGED dev.synchronized = False nr = cnaas_init() nr_filtered, _, _ = inventory_selector(nr, hostname=hostname) try: nrresult = nr_filtered.run(task=push_static_config, config=config, dry_run=dry_run, job_id=job_id) except Exception as e: logger.exception("Exception in apply_config: {}".format(e)) return NornirJobResult(nrresult=nrresult)
def get_evpn_peers(session, settings: dict): logger = get_logger() device_hostnames = [] for entry in settings['evpn_peers']: if 'hostname' in entry and Device.valid_hostname(entry['hostname']): device_hostnames.append(entry['hostname']) else: logger.error("Invalid entry specified in settings->evpn_peers, ignoring: {}".format(entry)) ret = [] for hostname in device_hostnames: dev = session.query(Device).filter(Device.hostname == hostname).one_or_none() if dev: ret.append(dev) # If no evpn_peers were specified return a list of all CORE devices instead if not ret: core_devs = session.query(Device).filter(Device.device_type == DeviceType.CORE).all() for dev in core_devs: ret.append(dev) return ret
def get_group_settings(): logger = get_logger() settings: dict = {} settings_origin: dict = {} with open('/etc/cnaas-nms/repository.yml', 'r') as repo_file: repo_config = yaml.safe_load(repo_file) local_repo_path = repo_config['settings_local'] try: verify_dir_structure(local_repo_path, DIR_STRUCTURE) except VerifyPathException as e: logger.exception( "Exception when verifying settings repository directory structure") raise e settings, settings_origin = read_settings(local_repo_path, ['global', 'groups.yml'], 'global', settings, settings_origin) check_settings_syntax(settings, settings_origin) return f_groups(**settings).dict(), settings_origin
def read_settings(local_repo_path: str, path: List[str], origin: str, merged_settings, merged_settings_origin, groups: List[str] = None, hostname: str = None) -> Tuple[dict, dict]: """ Args: local_repo_path: Local path to settings repository path: Path to look for inside repo origin: What to name call this origin merged_settings: Existing settings merged_settings_origin: Existing settings origin info groups: Optional list of groups to filter on (using filter_yamldata) hostname: Optional hostname to filter on (using filter_yamldata) Returns: merged_settings, merged_settings_origin """ logger = get_logger() filename = get_setting_filename(local_repo_path, path) yamldata = read_settings_file(filename) if not yamldata: return merged_settings, merged_settings_origin elif not isinstance(yamldata, dict): logger.info("Invalid yaml file ignored: {}".format(filename)) return merged_settings, merged_settings_origin settings: dict = yamldata if groups or hostname: syntax_dict, syntax_dict_origin = merge_dict_origin({}, settings, {}, origin) check_settings_syntax(syntax_dict, syntax_dict_origin) settings = filter_yamldata(settings, groups, hostname) return merge_dict_origin(merged_settings, settings, merged_settings_origin, origin)
def arista_device_reboot(task, job_id: Optional[str] = None) -> str: """ NorNir task to reboot a single device. Args: task: NorNir task. Returns: String, describing the result """ set_thread_data(job_id) logger = get_logger() with sqla_session() as session: if Job.check_job_abort_status(session, job_id): return "Reboot aborted" try: res = task.run(netmiko_send_command, command_string='enable', expect_string='.*#') res = task.run(netmiko_send_command, command_string='write', expect_string='.*#') res = task.run(netmiko_send_command, command_string='reload force', max_loops=2, expect_string='.*') except Exception as e: logger.exception('Failed to reboot switch {}: {}'.format( task.host.name, str(e))) raise e return "Device reboot done."
import cnaas_nms.confpush.nornir_helper from cnaas_nms.tools.log import get_logger from cnaas_nms.scheduler.wrapper import job_wrapper from cnaas_nms.confpush.nornir_helper import NornirJobResult from cnaas_nms.db.session import sqla_session from cnaas_nms.db.device import DeviceType, Device from nornir_netmiko.tasks import netmiko_send_command from nornir_utils.plugins.functions import print_result logger = get_logger() def device_erase_task(task, hostname: str) -> str: try: res = task.run(netmiko_send_command, command_string='enable', expect_string='.*#', name='Enable') res = task.run(netmiko_send_command, command_string='write erase now', expect_string='.*#', name='Write rase') print_result(res) except Exception as e: logger.info('Failed to factory default device {}, reason: {}'.format( task.host.name, e)) raise Exception('Factory default device')
def discover_device(ztp_mac: str, dhcp_ip: str, iteration: int, job_id: Optional[str] = None, scheduled_by: Optional[str] = None): logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter( Device.ztp_mac == ztp_mac).one_or_none() if not dev: raise ValueError( "Device with ztp_mac {} not found".format(ztp_mac)) if dev.state != DeviceState.DHCP_BOOT: raise ValueError( "Device with ztp_mac {} is in incorrect state: {}".format( ztp_mac, str(dev.state))) if str(dev.dhcp_ip) != dhcp_ip: dev.dhcp_ip = dhcp_ip hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=napalm_get, getters=["facts"]) if nrresult.failed: logger.info( "Could not contact device with ztp_mac {} (attempt {})".format( ztp_mac, iteration)) next_job_id = schedule_discover_device(ztp_mac, dhcp_ip, iteration + 1, scheduled_by) if next_job_id: return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id) else: return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] with sqla_session() as session: dev: Device = session.query(Device).filter( Device.ztp_mac == ztp_mac).one() dev.serial = facts['serial_number'][:64] dev.vendor = facts['vendor'][:64] dev.model = facts['model'][:64] dev.os_version = facts['os_version'][:64] dev.state = DeviceState.DISCOVERED new_hostname = dev.hostname logger.info(f"Device with ztp_mac {ztp_mac} successfully scanned" + f"(attempt {iteration}), moving to DISCOVERED state") except Exception as e: logger.exception( "Could not update device with ztp_mac {} with new facts: {}". format(ztp_mac, str(e))) logger.debug("nrresult for ztp_mac {}: {}".format(ztp_mac, nrresult)) raise e nrresult_hostname = nr_filtered.run(task=set_hostname_task, new_hostname=new_hostname) if nrresult_hostname.failed: logger.info("Could not set hostname for ztp_mac: {}".format(ztp_mac)) return NornirJobResult(nrresult=nrresult)
def init_device_step2(device_id: int, iteration: int = -1, job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> \ NornirJobResult: logger = get_logger() # step4+ in apjob: if success, update management ip and device state, trigger external stuff? with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() if dev.state != DeviceState.INIT: logger.error("Device with ID {} got to init step2 but is in incorrect state: {}".\ format(device_id, dev.state.name)) raise DeviceStateException( "Device must be in state INIT to continue init step 2") hostname = dev.hostname devtype: DeviceType = dev.device_type nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=napalm_get, getters=["facts"]) if nrresult.failed: next_job_id = schedule_init_device_step2(device_id, iteration, scheduled_by) if next_job_id: return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id) else: return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] found_hostname = facts['hostname'] except: raise InitError("Could not log in to device during init step 2") if hostname != found_hostname: raise InitError("Newly initialized device presents wrong hostname") with sqla_session() as session: dev: Device = session.query(Device).filter( Device.id == device_id).one() dev.state = DeviceState.MANAGED dev.synchronized = False set_facts(dev, facts) management_ip = dev.management_ip dev.dhcp_ip = None # Plugin hook: new managed device # Send: hostname , device type , serial , platform , vendor , model , os version try: pmh = PluginManagerHandler() pmh.pm.hook.new_managed_device(hostname=hostname, device_type=devtype.name, serial_number=facts['serial_number'], vendor=facts['vendor'], model=facts['model'], os_version=facts['os_version'], management_ip=str(management_ip)) except Exception as e: logger.exception( "Error while running plugin hooks for new_managed_device: ".format( str(e))) return NornirJobResult(nrresult=nrresult)
def init_fabric_device_step1( device_id: int, new_hostname: str, device_type: str, neighbors: Optional[List[str]] = [], job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Initialize fabric (CORE/DIST) device for management by CNaaS-NMS. Args: device_id: Device to select for initialization new_hostname: Hostname to configure on this device device_type: String representing DeviceType neighbors: Optional list of hostnames of peer devices job_id: job_id provided by scheduler when adding job scheduled_by: Username from JWT. Returns: Nornir result object Raises: DeviceStateException ValueError """ logger = get_logger() if DeviceType.has_name(device_type): devtype = DeviceType[device_type] else: raise ValueError("Invalid 'device_type' provided") if devtype not in [DeviceType.CORE, DeviceType.DIST]: raise ValueError( "Init fabric device requires device type DIST or CORE") with sqla_session() as session: dev = pre_init_checks(session, device_id) # Test update of linknets using LLDP data linknets = update_linknets(session, dev.hostname, devtype, ztp_hostname=new_hostname, dry_run=True) try: verified_neighbors = pre_init_check_neighbors( session, dev, devtype, linknets, neighbors) logger.debug("Found valid neighbors for INIT of {}: {}".format( new_hostname, ", ".join(verified_neighbors))) check_neighbor_sync(session, verified_neighbors) except Exception as e: raise e else: dev.state = DeviceState.INIT dev.device_type = devtype session.commit() # If neighbor check works, commit new linknets # This will also mark neighbors as unsynced linknets = update_linknets(session, dev.hostname, devtype, ztp_hostname=new_hostname, dry_run=False) logger.debug("New linknets for INIT of {} created: {}".format( new_hostname, linknets)) # Select and reserve a new management and infra IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() mgmt_ip = cnaas_nms.confpush.underlay.find_free_mgmt_lo_ip(session) infra_ip = cnaas_nms.confpush.underlay.find_free_infra_ip(session) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) dev.infra_ip = infra_ip session.commit() mgmt_variables = { 'mgmt_ipif': str(IPv4Interface('{}/32'.format(mgmt_ip))), 'mgmt_prefixlen': 32, 'infra_ipif': str(IPv4Interface('{}/32'.format(infra_ip))), 'infra_ip': str(infra_ip), } device_variables = populate_device_vars(session, dev, new_hostname, devtype) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # TODO: certicate # step2. push management config nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables, devtype=devtype, job_id=job_id) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = mgmt_ip # Remove the reserved IP since it's now saved in the device database instead reserved_ip = session.query(ReservedIP).filter( ReservedIP.device == dev).one_or_none() if reserved_ip: session.delete(reserved_ip) # Plugin hook, allocated IP try: pmh = PluginManagerHandler() pmh.pm.hook.allocated_ipv4(vrf='mgmt', ipv4_address=str(mgmt_ip), ipv4_network=None, hostname=hostname) except Exception as e: logger.exception( "Error while running plugin hooks for allocated_ipv4: ".format( str(e))) # step3. resync neighbors scheduler = Scheduler() sync_nei_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=1, scheduled_by=scheduled_by, kwargs={ 'hostnames': verified_neighbors, 'dry_run': False }) logger.info(f"Scheduled job {sync_nei_job_id} to resynchronize neighbors") # step4. register apscheduler job that continues steps scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_device_step2', when=60, scheduled_by=scheduled_by, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.info("Init step 2 for {} scheduled as job # {}".format( new_hostname, next_job_id)) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
def push_base_management(task, device_variables: dict, devtype: DeviceType, job_id): set_thread_data(job_id) logger = get_logger() logger.debug("Push basetemplate for host: {}".format(task.host.name)) with open('/etc/cnaas-nms/repository.yml', 'r') as db_file: repo_config = yaml.safe_load(db_file) local_repo_path = repo_config['templates_local'] mapfile = os.path.join(local_repo_path, task.host.platform, 'mapping.yml') if not os.path.isfile(mapfile): raise RepoStructureException( "File {} not found in template repo".format(mapfile)) with open(mapfile, 'r') as f: mapping = yaml.safe_load(f) template = mapping[devtype.name]['entrypoint'] # TODO: install device certificate, using new hostname and reserved IP. # exception on fail if tls_verify!=False try: device_cert_res = task.run(task=ztp_device_cert, job_id=job_id, new_hostname=task.host.name, management_ip=device_variables['mgmt_ip']) # TODO: handle exception from ztp_device_cert -> arista_copy_cert except Exception as e: logger.exception(e) else: if device_cert_res.failed: if device_cert_required(): logger.error( "Unable to install device certificate for {}, aborting". format(device_variables['host'])) raise Exception(device_cert_res[0].exception) else: logger.debug( "Unable to install device certificate for {}".format( device_variables['host'])) r = task.run(task=template_file, name="Generate initial device config", template=template, jinja_env=cnaas_jinja_env, path=f"{local_repo_path}/{task.host.platform}", **device_variables) #TODO: Handle template not found, variables not defined task.host["config"] = r.result # Use extra low timeout for this since we expect to loose connectivity after changing IP connopts_napalm = task.host.connection_options["napalm"] connopts_napalm.extras["timeout"] = 30 try: task.run(task=napalm_configure, name="Push base management config", replace=True, configuration=task.host["config"], dry_run=False) except Exception: task.run(task=napalm_get, getters=["facts"]) if not task.results[-1].failed: raise InitError( "Device {} did not commit new base management config".format( task.host.name))
def init_access_device_step1( device_id: int, new_hostname: str, mlag_peer_id: Optional[int] = None, mlag_peer_new_hostname: Optional[str] = None, uplink_hostnames_arg: Optional[List[str]] = [], job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Initialize access device for management by CNaaS-NMS. If a MLAG/MC-LAG pair is to be configured both mlag_peer_id and mlag_peer_new_hostname must be set. Args: device_id: Device to select for initialization new_hostname: Hostname to configure on this device mlag_peer_id: Device ID of MLAG peer device (optional) mlag_peer_new_hostname: Hostname to configure on peer device (optional) uplink_hostnames_arg: List of hostnames of uplink peer devices (optional) Used when initializing MLAG peer device job_id: job_id provided by scheduler when adding job scheduled_by: Username from JWT. Returns: Nornir result object Raises: DeviceStateException ValueError """ logger = get_logger() with sqla_session() as session: dev = pre_init_checks(session, device_id) # update linknets using LLDP data update_linknets(session, dev.hostname, DeviceType.ACCESS) # If this is the first device in an MLAG pair if mlag_peer_id and mlag_peer_new_hostname: mlag_peer_dev = pre_init_checks(session, mlag_peer_id) update_linknets(session, mlag_peer_dev.hostname, DeviceType.ACCESS) update_interfacedb_worker( session, dev, replace=True, delete_all=False, mlag_peer_hostname=mlag_peer_dev.hostname) update_interfacedb_worker(session, mlag_peer_dev, replace=True, delete_all=False, mlag_peer_hostname=dev.hostname) uplink_hostnames = dev.get_uplink_peer_hostnames(session) uplink_hostnames += mlag_peer_dev.get_uplink_peer_hostnames( session) # check that both devices see the correct MLAG peer pre_init_check_mlag(session, dev, mlag_peer_dev) pre_init_check_mlag(session, mlag_peer_dev, dev) # If this is the second device in an MLAG pair elif uplink_hostnames_arg: uplink_hostnames = uplink_hostnames_arg elif mlag_peer_id or mlag_peer_new_hostname: raise ValueError( "mlag_peer_id and mlag_peer_new_hostname must be specified together" ) # If this device is not part of an MLAG pair else: update_interfacedb_worker(session, dev, replace=True, delete_all=False) uplink_hostnames = dev.get_uplink_peer_hostnames(session) # TODO: check compatability, same dist pair and same ports on dists mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain( session, uplink_hostnames) if not mgmtdomain: raise Exception( "Could not find appropriate management domain for uplink peer devices: {}" .format(uplink_hostnames)) # Select a new management IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() mgmt_ip = mgmtdomain.find_free_mgmt_ip(session) if not mgmt_ip: raise Exception( "Could not find free management IP for management domain {}/{}" .format(mgmtdomain.id, mgmtdomain.description)) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) # Populate variables for template rendering mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) mgmt_variables = { 'mgmt_ipif': str( IPv4Interface('{}/{}'.format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), 'mgmt_ip': str(mgmt_ip), 'mgmt_prefixlen': int(mgmt_gw_ipif.network.prefixlen), 'mgmt_vlan_id': mgmtdomain.vlan, 'mgmt_gw': mgmt_gw_ipif.ip, } device_variables = populate_device_vars(session, dev, new_hostname, DeviceType.ACCESS) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # step2. push management config nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables, devtype=DeviceType.ACCESS, job_id=job_id) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = device_variables['mgmt_ip'] dev.state = DeviceState.INIT dev.device_type = DeviceType.ACCESS # Remove the reserved IP since it's now saved in the device database instead reserved_ip = session.query(ReservedIP).filter( ReservedIP.device == dev).one_or_none() if reserved_ip: session.delete(reserved_ip) # Plugin hook, allocated IP try: pmh = PluginManagerHandler() pmh.pm.hook.allocated_ipv4(vrf='mgmt', ipv4_address=str(mgmt_ip), ipv4_network=str(mgmt_gw_ipif.network), hostname=hostname) except Exception as e: logger.exception( "Error while running plugin hooks for allocated_ipv4: ".format( str(e))) # step3. register apscheduler job that continues steps if mlag_peer_id and mlag_peer_new_hostname: step2_delay = 30 + 60 + 30 # account for delayed start of peer device plus mgmt timeout else: step2_delay = 30 scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_device_step2', when=step2_delay, scheduled_by=scheduled_by, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.info("Init step 2 for {} scheduled as job # {}".format( new_hostname, next_job_id)) if mlag_peer_id and mlag_peer_new_hostname: mlag_peer_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step1', when=60, scheduled_by=scheduled_by, kwargs={ 'device_id': mlag_peer_id, 'new_hostname': mlag_peer_new_hostname, 'uplink_hostnames_arg': uplink_hostnames, 'scheduled_by': scheduled_by }) logger.info("MLAG peer (id {}) init scheduled as job # {}".format( mlag_peer_id, mlag_peer_job_id)) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
def pre_init_check_neighbors( session, dev: Device, devtype: DeviceType, linknets: List[dict], expected_neighbors: Optional[List[str]] = None, mlag_peer_dev: Optional[Device] = None) -> List[str]: """Check for compatible neighbors Args: session: SQLAlchemy session dev: Device object to check devtype: The target device type (not the same as current during init) linknets: List of linknets to check for compatible neighbors expected_neighbors: Optional list to manually specify neighbors Returns: List of compatible neighbor hostnames """ logger = get_logger() verified_neighbors = [] if expected_neighbors is not None and len(expected_neighbors) == 0: logger.debug( "expected_neighbors explicitly set to empty list, skipping neighbor checks" ) return [] if not linknets: raise Exception("No linknets were specified to check_neighbors") if devtype == DeviceType.ACCESS: neighbors = [] uplinks = [] for linknet in linknets: if linknet['device_a_hostname'] == linknet['device_b_hostname']: continue # don't add loopback cables as neighbors elif linknet['device_a_hostname'] == dev.hostname: if mlag_peer_dev and linknet[ 'device_b_hostname'] == mlag_peer_dev.hostname: continue # only add mlag peer linknet in one direction to avoid duplicate else: neighbor = linknet['device_b_hostname'] elif linknet['device_b_hostname'] == dev.hostname: neighbor = linknet['device_a_hostname'] elif mlag_peer_dev: if linknet['device_a_hostname'] == mlag_peer_dev.hostname: neighbor = linknet['device_b_hostname'] elif linknet['device_b_hostname'] == mlag_peer_dev.hostname: neighbor = linknet['device_a_hostname'] else: raise Exception("Own hostname not found in linknet") neighbor_dev: Device = session.query(Device). \ filter(Device.hostname == neighbor).one_or_none() if not neighbor_dev: raise Exception( "Neighbor device {} not found in database".format( neighbor)) if neighbor_dev.device_type in [ DeviceType.ACCESS, DeviceType.DIST ]: uplinks.append(neighbor) neighbors.append(neighbor) try: cnaas_nms.db.helper.find_mgmtdomain(session, uplinks) except Exception as e: raise InitVerificationError(str(e)) else: verified_neighbors = neighbors elif devtype in [DeviceType.CORE, DeviceType.DIST]: for linknet in linknets: if linknet['device_a_hostname'] == dev.hostname: neighbor = linknet['device_b_hostname'] elif linknet['device_b_hostname'] == dev.hostname: neighbor = linknet['device_a_hostname'] else: raise Exception("Own hostname not found in linknet") if expected_neighbors: if neighbor in expected_neighbors: verified_neighbors.append(neighbor) # Neighbor was explicitly set -> skip verification of neighbor devtype continue neighbor_dev: Device = session.query(Device).\ filter(Device.hostname == neighbor).one_or_none() if not neighbor_dev: raise Exception( "Neighbor device {} not found in database".format( neighbor)) if devtype == DeviceType.CORE: if neighbor_dev.device_type == DeviceType.DIST: verified_neighbors.append(neighbor) else: logger.warn( "Neighbor device {} is of unexpected device type {}, ignoring" .format(neighbor, neighbor_dev.device_type.name)) else: if neighbor_dev.device_type == DeviceType.CORE: verified_neighbors.append(neighbor) else: logger.warn( "Neighbor device {} is of unexpected device type {}, ignoring" .format(neighbor, neighbor_dev.device_type.name)) if expected_neighbors: if len(expected_neighbors) != len(verified_neighbors): raise InitVerificationError( "Not all expected neighbors were detected") else: if len(verified_neighbors) < 2: raise InitVerificationError( "Not enough compatible neighbors ({} of 2) were detected". format(len(verified_neighbors))) return verified_neighbors