def device_upgrade(download: Optional[bool] = False, activate: Optional[bool] = False, filename: Optional[bool] = None, group: Optional[str] = None, hostname: Optional[str] = None, url: Optional[str] = None, job_id: Optional[str] = None, pre_flight: Optional[bool] = False, reboot: Optional[bool] = False, scheduled_by: Optional[str] = None) -> NornirJobResult: nr = cnaas_init() if hostname: nr_filtered, dev_count, _ = inventory_selector(nr, hostname=hostname) elif group: nr_filtered, dev_count, _ = inventory_selector(nr, group=group) else: raise ValueError( "Neither hostname nor group specified for device_upgrade") device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for firmware upgrade ({}): {}".format( dev_count, ", ".join(device_list))) # Make sure we only upgrade Arista access switches for device in device_list: with sqla_session() as session: dev: Device = session.query(Device).\ filter(Device.hostname == device).one_or_none() if not dev: raise Exception('Could not find device: {}'.format(device)) if dev.platform != 'eos': raise Exception( 'Invalid device platform "{}" for device: {}'.format( dev.platform, device)) # Start tasks to take care of the upgrade try: nrresult = nr_filtered.run(task=device_upgrade_task, job_id=job_id, download=download, filename=filename, url=url, pre_flight=pre_flight, reboot=reboot, activate=activate) print_result(nrresult) except Exception as e: logger.exception('Exception while upgrading devices: {}'.format( str(e))) return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) for hostname in failed_hosts: logger.error("Firmware upgrade of device '{}' failed".format(hostname)) if nrresult.failed: logger.error("Not all devices were successfully upgraded") return NornirJobResult(nrresult=nrresult)
def device_erase(device_id: int = None, job_id: int = None) -> NornirJobResult: with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() if dev: hostname = dev.hostname device_type = dev.device_type else: raise Exception('Could not find a device with ID {}'.format( device_id)) if device_type != DeviceType.ACCESS: raise Exception('Can only do factory default on access') nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname).filter(managed=True) device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device selected: {}".format( device_list )) try: nrresult = nr_filtered.run(task=device_erase_task, hostname=hostname) print_result(nrresult) except Exception as e: logger.exception('Exception while erasing device: {}'.format( str(e))) return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) for hostname in failed_hosts: logger.error("Failed to factory default device '{}' failed".format( hostname)) if nrresult.failed: logger.error("Factory default failed") if failed_hosts == []: with sqla_session() as session: dev: Device = session.query(Device).filter(Device.id == device_id).one_or_none() session.delete(dev) session.commit() return NornirJobResult(nrresult=nrresult)
def sync_basetemplate(hostname: Optional[str]=None, device_type: Optional[DeviceType]=None, dry_run: bool=True) -> NornirJobResult: """Synchronize base system template to device or device group. Args: hostname: Hostname of a single device to sync device_type: A device group type to sync dry_run: Set to true to only perform a NAPALM dry_run of config changes """ nrresult = None nr = cnaas_nms.confpush.nornir_helper.cnaas_init() if hostname and isinstance(hostname, str): nr_filtered = nr.filter(name=hostname) elif device_type and isinstance(device_type, DeviceType): group_name = ('T_'+device_type.value) nr_filtered = nr.filter(F(groups__contains=group_name)) else: raise ValueError("hostname or device_type must be specified") nrresult = nr_filtered.run(task=push_basetemplate) return NornirJobResult( nrresult = nrresult )
def renew_cert(hostname: Optional[str] = None, group: Optional[str] = None, job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: logger = get_logger() nr = cnaas_init() if hostname: nr_filtered, dev_count, _ = inventory_selector(nr, hostname=hostname) elif group: nr_filtered, dev_count, _ = inventory_selector(nr, group=group) else: raise ValueError("Neither hostname nor group specified for renew_cert") device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for renew certificate ({}): {}".format( dev_count, ", ".join(device_list))) supported_platforms = ['eos'] # Make sure we only attempt supported devices for device in device_list: with sqla_session() as session: dev: Device = session.query(Device). \ filter(Device.hostname == device).one_or_none() if not dev: raise Exception('Could not find device: {}'.format(device)) if dev.platform not in supported_platforms: raise Exception( 'Unsupported device platform "{}" for device: {}'.format( dev.platform, device)) try: nrresult = nr_filtered.run(task=renew_cert_task, job_id=job_id) except Exception as e: logger.exception('Exception while renewing certificates: {}'.format( str(e))) return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) for hostname in failed_hosts: logger.error( "Certificate renew on device '{}' failed".format(hostname)) if nrresult.failed: logger.error("Not all devices got new certificates") return NornirJobResult(nrresult=nrresult)
def update_facts(hostname: str, job_id: Optional[str] = None, scheduled_by: Optional[str] = None): logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == hostname).one_or_none() if not dev: raise ValueError( "Device with hostname {} not found".format(hostname)) if not (dev.state == DeviceState.MANAGED or dev.state == DeviceState.UNMANAGED): raise ValueError( "Device with hostname {} is in incorrect state: {}".format( hostname, str(dev.state))) hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=networking.napalm_get, getters=["facts"]) if nrresult.failed: logger.error( "Could not contact device with hostname {}".format(hostname)) return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == hostname).one() dev.serial = facts['serial_number'] dev.vendor = facts['vendor'] dev.model = facts['model'] dev.os_version = facts['os_version'] logger.debug("Updating facts for device {}: {}, {}, {}, {}".format( hostname, facts['serial_number'], facts['vendor'], facts['model'], facts['os_version'])) except Exception as e: logger.exception( "Could not update device with hostname {} with new facts: {}". format(hostname, str(e))) logger.debug("Get facts nrresult for hostname {}: {}".format( hostname, nrresult)) raise e return NornirJobResult(nrresult=nrresult)
def init_access_device_step2(device_id: int, iteration: int = -1) -> NornirJobResult: # step4+ in apjob: if success, update management ip and device state, trigger external stuff? with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() if dev.state != DeviceState.INIT: logger.error("Device with ID {} got to init step2 but is in incorrect state: {}".\ format(device_id, dev.state.name)) raise DeviceStateException( "Device must be in state INIT to continue init step 2") hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=networking.napalm_get, getters=["facts"]) if nrresult.failed: next_job_id = schedule_init_access_device_step2(device_id, iteration) if next_job_id: return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id) else: return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] found_hostname = facts['hostname'] except: raise InitError("Could not log in to device during init step 2") if hostname != found_hostname: raise InitError("Newly initialized device presents wrong hostname") with sqla_session() as session: dev: Device = session.query(Device).filter( Device.id == device_id).one() dev.state = DeviceState.MANAGED dev.device_type = DeviceType.ACCESS dev.synchronized = False #TODO: remove dhcp_ip ? try: update_interfacedb(hostname, replace=True) except Exception as e: logger.exception( "Exception while updating interface database for device {}: {}".\ format(hostname, str(e))) return NornirJobResult(nrresult=nrresult)
def apply_config(hostname: str, config: str, dry_run: bool, job_id: Optional[int] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Apply a static configuration (from backup etc) to a device. Args: hostname: Specify a single host by hostname to synchronize config: Static configuration to apply dry_run: Set to false to actually apply config to device job_id: Job ID number scheduled_by: Username from JWT Returns: NornirJobResult """ logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter( Device.hostname == hostname).one_or_none() if not dev: raise Exception("Device {} not found".format(hostname)) elif not (dev.state == DeviceState.MANAGED or dev.state == DeviceState.UNMANAGED): raise Exception("Device {} is in invalid state: {}".format( hostname, dev.state)) if not dry_run: dev.state = DeviceState.UNMANAGED dev.synchronized = False nr = cnaas_init() nr_filtered, _, _ = inventory_selector(nr, hostname=hostname) try: nrresult = nr_filtered.run(task=push_static_config, config=config, dry_run=dry_run, job_id=job_id) except Exception as e: logger.exception("Exception in apply_config: {}".format(e)) return NornirJobResult(nrresult=nrresult)
def discover_device(ztp_mac: str, dhcp_ip: str, iteration: int, job_id: Optional[str] = None, scheduled_by: Optional[str] = None): logger = get_logger() with sqla_session() as session: dev: Device = session.query(Device).filter( Device.ztp_mac == ztp_mac).one_or_none() if not dev: raise ValueError( "Device with ztp_mac {} not found".format(ztp_mac)) if dev.state != DeviceState.DHCP_BOOT: raise ValueError( "Device with ztp_mac {} is in incorrect state: {}".format( ztp_mac, str(dev.state))) if str(dev.dhcp_ip) != dhcp_ip: dev.dhcp_ip = dhcp_ip hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=napalm_get, getters=["facts"]) if nrresult.failed: logger.info( "Could not contact device with ztp_mac {} (attempt {})".format( ztp_mac, iteration)) next_job_id = schedule_discover_device(ztp_mac, dhcp_ip, iteration + 1, scheduled_by) if next_job_id: return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id) else: return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] with sqla_session() as session: dev: Device = session.query(Device).filter( Device.ztp_mac == ztp_mac).one() dev.serial = facts['serial_number'][:64] dev.vendor = facts['vendor'][:64] dev.model = facts['model'][:64] dev.os_version = facts['os_version'][:64] dev.state = DeviceState.DISCOVERED new_hostname = dev.hostname logger.info(f"Device with ztp_mac {ztp_mac} successfully scanned" + f"(attempt {iteration}), moving to DISCOVERED state") except Exception as e: logger.exception( "Could not update device with ztp_mac {} with new facts: {}". format(ztp_mac, str(e))) logger.debug("nrresult for ztp_mac {}: {}".format(ztp_mac, nrresult)) raise e nrresult_hostname = nr_filtered.run(task=set_hostname_task, new_hostname=new_hostname) if nrresult_hostname.failed: logger.info("Could not set hostname for ztp_mac: {}".format(ztp_mac)) return NornirJobResult(nrresult=nrresult)
def init_device_step2(device_id: int, iteration: int = -1, job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> \ NornirJobResult: logger = get_logger() # step4+ in apjob: if success, update management ip and device state, trigger external stuff? with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() if dev.state != DeviceState.INIT: logger.error("Device with ID {} got to init step2 but is in incorrect state: {}".\ format(device_id, dev.state.name)) raise DeviceStateException( "Device must be in state INIT to continue init step 2") hostname = dev.hostname devtype: DeviceType = dev.device_type nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) nrresult = nr_filtered.run(task=napalm_get, getters=["facts"]) if nrresult.failed: next_job_id = schedule_init_device_step2(device_id, iteration, scheduled_by) if next_job_id: return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id) else: return NornirJobResult(nrresult=nrresult) try: facts = nrresult[hostname][0].result['facts'] found_hostname = facts['hostname'] except: raise InitError("Could not log in to device during init step 2") if hostname != found_hostname: raise InitError("Newly initialized device presents wrong hostname") with sqla_session() as session: dev: Device = session.query(Device).filter( Device.id == device_id).one() dev.state = DeviceState.MANAGED dev.synchronized = False set_facts(dev, facts) management_ip = dev.management_ip dev.dhcp_ip = None # Plugin hook: new managed device # Send: hostname , device type , serial , platform , vendor , model , os version try: pmh = PluginManagerHandler() pmh.pm.hook.new_managed_device(hostname=hostname, device_type=devtype.name, serial_number=facts['serial_number'], vendor=facts['vendor'], model=facts['model'], os_version=facts['os_version'], management_ip=str(management_ip)) except Exception as e: logger.exception( "Error while running plugin hooks for new_managed_device: ".format( str(e))) return NornirJobResult(nrresult=nrresult)
def init_fabric_device_step1( device_id: int, new_hostname: str, device_type: str, neighbors: Optional[List[str]] = [], job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Initialize fabric (CORE/DIST) device for management by CNaaS-NMS. Args: device_id: Device to select for initialization new_hostname: Hostname to configure on this device device_type: String representing DeviceType neighbors: Optional list of hostnames of peer devices job_id: job_id provided by scheduler when adding job scheduled_by: Username from JWT. Returns: Nornir result object Raises: DeviceStateException ValueError """ logger = get_logger() if DeviceType.has_name(device_type): devtype = DeviceType[device_type] else: raise ValueError("Invalid 'device_type' provided") if devtype not in [DeviceType.CORE, DeviceType.DIST]: raise ValueError( "Init fabric device requires device type DIST or CORE") with sqla_session() as session: dev = pre_init_checks(session, device_id) # Test update of linknets using LLDP data linknets = update_linknets(session, dev.hostname, devtype, ztp_hostname=new_hostname, dry_run=True) try: verified_neighbors = pre_init_check_neighbors( session, dev, devtype, linknets, neighbors) logger.debug("Found valid neighbors for INIT of {}: {}".format( new_hostname, ", ".join(verified_neighbors))) check_neighbor_sync(session, verified_neighbors) except Exception as e: raise e else: dev.state = DeviceState.INIT dev.device_type = devtype session.commit() # If neighbor check works, commit new linknets # This will also mark neighbors as unsynced linknets = update_linknets(session, dev.hostname, devtype, ztp_hostname=new_hostname, dry_run=False) logger.debug("New linknets for INIT of {} created: {}".format( new_hostname, linknets)) # Select and reserve a new management and infra IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() mgmt_ip = cnaas_nms.confpush.underlay.find_free_mgmt_lo_ip(session) infra_ip = cnaas_nms.confpush.underlay.find_free_infra_ip(session) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) dev.infra_ip = infra_ip session.commit() mgmt_variables = { 'mgmt_ipif': str(IPv4Interface('{}/32'.format(mgmt_ip))), 'mgmt_prefixlen': 32, 'infra_ipif': str(IPv4Interface('{}/32'.format(infra_ip))), 'infra_ip': str(infra_ip), } device_variables = populate_device_vars(session, dev, new_hostname, devtype) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # TODO: certicate # step2. push management config nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables, devtype=devtype, job_id=job_id) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = mgmt_ip # Remove the reserved IP since it's now saved in the device database instead reserved_ip = session.query(ReservedIP).filter( ReservedIP.device == dev).one_or_none() if reserved_ip: session.delete(reserved_ip) # Plugin hook, allocated IP try: pmh = PluginManagerHandler() pmh.pm.hook.allocated_ipv4(vrf='mgmt', ipv4_address=str(mgmt_ip), ipv4_network=None, hostname=hostname) except Exception as e: logger.exception( "Error while running plugin hooks for allocated_ipv4: ".format( str(e))) # step3. resync neighbors scheduler = Scheduler() sync_nei_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=1, scheduled_by=scheduled_by, kwargs={ 'hostnames': verified_neighbors, 'dry_run': False }) logger.info(f"Scheduled job {sync_nei_job_id} to resynchronize neighbors") # step4. register apscheduler job that continues steps scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_device_step2', when=60, scheduled_by=scheduled_by, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.info("Init step 2 for {} scheduled as job # {}".format( new_hostname, next_job_id)) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
def init_access_device_step1( device_id: int, new_hostname: str, mlag_peer_id: Optional[int] = None, mlag_peer_new_hostname: Optional[str] = None, uplink_hostnames_arg: Optional[List[str]] = [], job_id: Optional[str] = None, scheduled_by: Optional[str] = None) -> NornirJobResult: """Initialize access device for management by CNaaS-NMS. If a MLAG/MC-LAG pair is to be configured both mlag_peer_id and mlag_peer_new_hostname must be set. Args: device_id: Device to select for initialization new_hostname: Hostname to configure on this device mlag_peer_id: Device ID of MLAG peer device (optional) mlag_peer_new_hostname: Hostname to configure on peer device (optional) uplink_hostnames_arg: List of hostnames of uplink peer devices (optional) Used when initializing MLAG peer device job_id: job_id provided by scheduler when adding job scheduled_by: Username from JWT. Returns: Nornir result object Raises: DeviceStateException ValueError """ logger = get_logger() with sqla_session() as session: dev = pre_init_checks(session, device_id) # update linknets using LLDP data update_linknets(session, dev.hostname, DeviceType.ACCESS) # If this is the first device in an MLAG pair if mlag_peer_id and mlag_peer_new_hostname: mlag_peer_dev = pre_init_checks(session, mlag_peer_id) update_linknets(session, mlag_peer_dev.hostname, DeviceType.ACCESS) update_interfacedb_worker( session, dev, replace=True, delete_all=False, mlag_peer_hostname=mlag_peer_dev.hostname) update_interfacedb_worker(session, mlag_peer_dev, replace=True, delete_all=False, mlag_peer_hostname=dev.hostname) uplink_hostnames = dev.get_uplink_peer_hostnames(session) uplink_hostnames += mlag_peer_dev.get_uplink_peer_hostnames( session) # check that both devices see the correct MLAG peer pre_init_check_mlag(session, dev, mlag_peer_dev) pre_init_check_mlag(session, mlag_peer_dev, dev) # If this is the second device in an MLAG pair elif uplink_hostnames_arg: uplink_hostnames = uplink_hostnames_arg elif mlag_peer_id or mlag_peer_new_hostname: raise ValueError( "mlag_peer_id and mlag_peer_new_hostname must be specified together" ) # If this device is not part of an MLAG pair else: update_interfacedb_worker(session, dev, replace=True, delete_all=False) uplink_hostnames = dev.get_uplink_peer_hostnames(session) # TODO: check compatability, same dist pair and same ports on dists mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain( session, uplink_hostnames) if not mgmtdomain: raise Exception( "Could not find appropriate management domain for uplink peer devices: {}" .format(uplink_hostnames)) # Select a new management IP for the device ReservedIP.clean_reservations(session, device=dev) session.commit() mgmt_ip = mgmtdomain.find_free_mgmt_ip(session) if not mgmt_ip: raise Exception( "Could not find free management IP for management domain {}/{}" .format(mgmtdomain.id, mgmtdomain.description)) reserved_ip = ReservedIP(device=dev, ip=mgmt_ip) session.add(reserved_ip) # Populate variables for template rendering mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) mgmt_variables = { 'mgmt_ipif': str( IPv4Interface('{}/{}'.format(mgmt_ip, mgmt_gw_ipif.network.prefixlen))), 'mgmt_ip': str(mgmt_ip), 'mgmt_prefixlen': int(mgmt_gw_ipif.network.prefixlen), 'mgmt_vlan_id': mgmtdomain.vlan, 'mgmt_gw': mgmt_gw_ipif.ip, } device_variables = populate_device_vars(session, dev, new_hostname, DeviceType.ACCESS) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # step2. push management config nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables, devtype=DeviceType.ACCESS, job_id=job_id) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = device_variables['mgmt_ip'] dev.state = DeviceState.INIT dev.device_type = DeviceType.ACCESS # Remove the reserved IP since it's now saved in the device database instead reserved_ip = session.query(ReservedIP).filter( ReservedIP.device == dev).one_or_none() if reserved_ip: session.delete(reserved_ip) # Plugin hook, allocated IP try: pmh = PluginManagerHandler() pmh.pm.hook.allocated_ipv4(vrf='mgmt', ipv4_address=str(mgmt_ip), ipv4_network=str(mgmt_gw_ipif.network), hostname=hostname) except Exception as e: logger.exception( "Error while running plugin hooks for allocated_ipv4: ".format( str(e))) # step3. register apscheduler job that continues steps if mlag_peer_id and mlag_peer_new_hostname: step2_delay = 30 + 60 + 30 # account for delayed start of peer device plus mgmt timeout else: step2_delay = 30 scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_device_step2', when=step2_delay, scheduled_by=scheduled_by, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.info("Init step 2 for {} scheduled as job # {}".format( new_hostname, next_job_id)) if mlag_peer_id and mlag_peer_new_hostname: mlag_peer_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step1', when=60, scheduled_by=scheduled_by, kwargs={ 'device_id': mlag_peer_id, 'new_hostname': mlag_peer_new_hostname, 'uplink_hostnames_arg': uplink_hostnames, 'scheduled_by': scheduled_by }) logger.info("MLAG peer (id {}) init scheduled as job # {}".format( mlag_peer_id, mlag_peer_job_id)) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
def sync_devices(hostnames: Optional[List[str]] = None, device_type: Optional[str] = None, group: Optional[str] = None, dry_run: bool = True, force: bool = False, auto_push: bool = False, job_id: Optional[int] = None, scheduled_by: Optional[str] = None, resync: bool = False) -> NornirJobResult: """Synchronize devices to their respective templates. If no arguments are specified then synchronize all devices that are currently out of sync. Args: hostname: Specify a single host by hostname to synchronize device_type: Specify a device type to synchronize group: Specify a group of devices to synchronize dry_run: Don't commit generated config to device force: Commit config even if changes made outside CNaaS will get overwritten auto_push: Automatically do live-run after dry-run if change score is low job_id: job_id provided by scheduler when adding a new job scheduled_by: Username from JWT resync: Re-synchronize a device even if it's marked as synced in the database, a device selected by hostname is always re-synced Returns: NornirJobResult """ logger = get_logger() nr = cnaas_init() dev_count = 0 skipped_hostnames = [] if hostnames: nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, hostname=hostnames) else: if device_type: nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, resync=resync, device_type=device_type) elif group: nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, resync=resync, group=group) else: # all devices nr_filtered, dev_count, skipped_hostnames = \ inventory_selector(nr, resync=resync) if skipped_hostnames: logger.info("Device(s) already synchronized, skipping ({}): {}".format( len(skipped_hostnames), ", ".join(skipped_hostnames) )) device_list = list(nr_filtered.inventory.hosts.keys()) logger.info("Device(s) selected for synchronization ({}): {}".format( dev_count, ", ".join(device_list) )) try: nrresult = nr_filtered.run(task=sync_check_hash, force=force, job_id=job_id) except Exception as e: logger.exception("Exception while checking config hash: {}".format(str(e))) raise e else: if nrresult.failed: # Mark devices as unsynchronized if config hash check failed with sqla_session() as session: session.query(Device).filter(Device.hostname.in_(nrresult.failed_hosts.keys())).\ update({Device.synchronized: False}, synchronize_session=False) raise Exception('Configuration hash check failed for {}'.format( ' '.join(nrresult.failed_hosts.keys()))) if not dry_run: with sqla_session() as session: logger.info("Trying to acquire lock for devices to run syncto job: {}".format(job_id)) if not Joblock.acquire_lock(session, name='devices', job_id=job_id): raise JoblockError("Unable to acquire lock for configuring devices") try: nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run, job_id=job_id) except Exception as e: logger.exception("Exception while synchronizing devices: {}".format(str(e))) try: if not dry_run: with sqla_session() as session: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) except Exception: logger.error("Unable to release devices lock after syncto job") return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) for hostname in failed_hosts: logger.error("Synchronization of device '{}' failed".format(hostname)) if nrresult.failed: logger.error("Not all devices were successfully synchronized") total_change_score = 1 change_scores = [] changed_hosts = [] unchanged_hosts = [] # calculate change impact score for host, results in nrresult.items(): if len(results) != 3: logger.debug("Unable to calculate change score for failed device {}".format(host)) elif results[2].diff: changed_hosts.append(host) if "change_score" in results[0].host: change_scores.append(results[0].host["change_score"]) logger.debug("Change score for host {}: {:.1f}".format( host, results[0].host["change_score"])) else: unchanged_hosts.append(host) change_scores.append(0) logger.debug("Empty diff for host {}, 0 change score".format( host)) nr_confighash = None if dry_run and force: # update config hash for devices that had an empty diff because local # changes on a device can cause reordering of CLI commands that results # in config hash mismatch even if the calculated diff was empty def include_filter(host, include_list=unchanged_hosts): if host.name in include_list: return True else: return False nr_confighash = nr_filtered.filter(filter_func=include_filter) elif not dry_run: # set new config hash for devices that was successfully updated def exclude_filter(host, exclude_list=failed_hosts+unchanged_hosts): if host.name in exclude_list: return False else: return True nr_confighash = nr_filtered.filter(filter_func=exclude_filter) if nr_confighash: try: nrresult_confighash = nr_confighash.run(task=update_config_hash) except Exception as e: logger.exception("Exception while updating config hashes: {}".format(str(e))) else: if nrresult_confighash.failed: logger.error("Unable to update some config hashes: {}".format( list(nrresult_confighash.failed_hosts.keys()))) # set devices as synchronized if needed with sqla_session() as session: for hostname in changed_hosts: if dry_run: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = False else: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True for hostname in unchanged_hosts: dev: Device = session.query(Device).filter(Device.hostname == hostname).one() dev.synchronized = True if not dry_run: logger.info("Releasing lock for devices from syncto job: {}".format(job_id)) Joblock.release_lock(session, job_id=job_id) if len(device_list) == 0: total_change_score = 0 elif not change_scores or total_change_score >= 100 or failed_hosts: total_change_score = 100 else: # use individual max as total_change_score, range 1-100 total_change_score = max(min(int(max(change_scores) + 0.5), 100), 1) logger.info( "Change impact score: {:.1f} (dry_run: {}, selected devices: {}, changed devices: {})". format(total_change_score, dry_run, len(device_list), len(changed_hosts))) next_job_id = None if auto_push and len(device_list) == 1 and hostnames and dry_run: if not changed_hosts: logger.info("None of the selected host has any changes (diff), skipping auto-push") elif total_change_score < AUTOPUSH_MAX_SCORE: scheduler = Scheduler() next_job_id = scheduler.add_onetime_job( 'cnaas_nms.confpush.sync_devices:sync_devices', when=0, scheduled_by=scheduled_by, kwargs={'hostnames': hostnames, 'dry_run': False, 'force': force}) logger.info(f"Auto-push scheduled live-run of commit as job id {next_job_id}") else: logger.info( f"Auto-push of config to device {hostnames} failed because change score of " f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}" ) return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id, change_score=total_change_score)
def sync_devices(hostname: Optional[str] = None, device_type: Optional[str] = None, dry_run: bool = True, force: bool = False) -> NornirJobResult: """Synchronize devices to their respective templates. If no arguments are specified then synchronize all devices that are currently out of sync. Args: hostname: Specify a single host by hostname to synchronize device_type: Specify a device type to synchronize Returns: NornirJobResult """ nr = cnaas_nms.confpush.nornir_helper.cnaas_init() if hostname: nr_filtered = nr.filter(name=hostname).filter(managed=True) elif device_type: nr_filtered = nr.filter(F(groups__contains='T_' + device_type)) # device type else: nr_filtered = nr.filter(synchronized=False).filter( managed=True) # all unsynchronized devices device_list = list(nr_filtered.inventory.hosts.keys()) logger.info( "Device(s) selected for synchronization: {}".format(device_list)) alterned_devices = [] for device in device_list: stored_config_hash = Device.get_config_hash(device) if stored_config_hash is None: continue current_config_hash = get_running_config_hash(device) if current_config_hash is None: raise Exception('Failed to get configuration hash') if stored_config_hash != current_config_hash: logger.info( "Device {} configuration is altered outside of CNaaS!".format( device)) alterned_devices.append(device) if alterned_devices != [] and force is False: raise Exception( 'Configuration for {} is altered outside of CNaaS'.format( ', '.join(alterned_devices))) try: nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run) print_result(nrresult) except Exception as e: logger.exception("Exception while synchronizing devices: {}".format( str(e))) return NornirJobResult(nrresult=nrresult) failed_hosts = list(nrresult.failed_hosts.keys()) if not dry_run: for key in nrresult.keys(): if key in failed_hosts: continue new_config_hash = get_running_config_hash(key) if new_config_hash is None: raise Exception('Failed to get configuration hash') Device.set_config_hash(key, new_config_hash) with sqla_session() as session: for hostname in device_list: if hostname in failed_hosts: logger.error( "Synchronization of device '{}' failed".format( hostname)) continue dev: Device = session.query(Device).filter( Device.hostname == hostname).one() dev.synchronized = True if nrresult.failed: logger.error("Not all devices were successfully synchronized") return NornirJobResult(nrresult=nrresult)
def init_access_device_step1(device_id: int, new_hostname: str) -> NornirJobResult: """Initialize access device for management by CNaaS-NMS Args: hostname (str): Hostname of device to initialize Returns: Nornir result object Raises: DeviceStateException """ # Check that we can find device and that it's in the correct state to start init with sqla_session() as session: dev: Device = session.query(Device).filter( Device.id == device_id).one() if dev.state != DeviceState.DISCOVERED: raise DeviceStateException( "Device must be in state DISCOVERED to begin init") old_hostname = dev.hostname # Perform connectivity check nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_old_filtered = nr.filter(name=old_hostname) try: nrresult_old = nr_old_filtered.run(task=networking.napalm_get, getters=["facts"]) except Exception as e: raise ConnectionCheckError( f"Failed to connect to device_id {device_id}: {str(e)}") if nrresult_old.failed: raise ConnectionCheckError( f"Failed to connect to device_id {device_id}") cnaas_nms.confpush.get.update_linknets(old_hostname) uplinks = [] neighbor_hostnames = [] with sqla_session() as session: dev = session.query(Device).filter( Device.hostname == old_hostname).one() for neighbor_d in dev.get_neighbors(session): if neighbor_d.device_type == DeviceType.DIST: local_if = dev.get_link_to_local_ifname(session, neighbor_d) if local_if: uplinks.append({'ifname': local_if}) neighbor_hostnames.append(neighbor_d.hostname) logger.debug("Uplinks for device {} detected: {} neighbor_hostnames: {}".\ format(device_id, uplinks, neighbor_hostnames)) #TODO: check compatability, same dist pair and same ports on dists mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain( session, neighbor_hostnames) if not mgmtdomain: raise Exception( "Could not find appropriate management domain for uplink peer devices: {}" .format(neighbor_hostnames)) mgmt_ip = mgmtdomain.find_free_mgmt_ip(session) if not mgmt_ip: raise Exception( "Could not find free management IP for management domain {}". format(mgmtdomain.id)) mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw) device_variables = { 'mgmt_ipif': IPv4Interface('{}/{}'.format(mgmt_ip, mgmt_gw_ipif.network.prefixlen)), 'uplinks': uplinks, 'mgmt_vlan_id': mgmtdomain.vlan, 'mgmt_gw': mgmt_gw_ipif.ip } dev = session.query(Device).filter(Device.id == device_id).one() dev.state = DeviceState.INIT dev.hostname = new_hostname session.commit() hostname = dev.hostname nr = cnaas_nms.confpush.nornir_helper.cnaas_init() nr_filtered = nr.filter(name=hostname) # step2. push management config try: nrresult = nr_filtered.run(task=push_base_management, device_variables=device_variables) except Exception as e: pass # ignore exception, we expect to loose connectivity. # sometimes we get no exception here, but it's saved in result # other times we get socket.timeout, pyeapi.eapilib.ConnectionError or # napalm.base.exceptions.ConnectionException to handle here? if not nrresult.failed: raise #we don't expect success here print_result(nrresult) with sqla_session() as session: dev = session.query(Device).filter(Device.id == device_id).one() dev.management_ip = device_variables['mgmt_ipif'].ip # step3. register apscheduler job that continues steps scheduler = Scheduler() next_job = scheduler.add_onetime_job( 'cnaas_nms.confpush.init_device:init_access_device_step2', when=0, kwargs={ 'device_id': device_id, 'iteration': 1 }) logger.debug(f"Step 2 scheduled as ID {next_job.id}") return NornirJobResult(nrresult=nrresult, next_job_id=next_job.id)