Example #1
0
def device_upgrade(download: Optional[bool] = False,
                   activate: Optional[bool] = False,
                   filename: Optional[bool] = None,
                   group: Optional[str] = None,
                   hostname: Optional[str] = None,
                   url: Optional[str] = None,
                   job_id: Optional[str] = None,
                   pre_flight: Optional[bool] = False,
                   reboot: Optional[bool] = False,
                   scheduled_by: Optional[str] = None) -> NornirJobResult:

    nr = cnaas_init()
    if hostname:
        nr_filtered, dev_count, _ = inventory_selector(nr, hostname=hostname)
    elif group:
        nr_filtered, dev_count, _ = inventory_selector(nr, group=group)
    else:
        raise ValueError(
            "Neither hostname nor group specified for device_upgrade")

    device_list = list(nr_filtered.inventory.hosts.keys())
    logger.info("Device(s) selected for firmware upgrade ({}): {}".format(
        dev_count, ", ".join(device_list)))

    # Make sure we only upgrade Arista access switches
    for device in device_list:
        with sqla_session() as session:
            dev: Device = session.query(Device).\
                filter(Device.hostname == device).one_or_none()
            if not dev:
                raise Exception('Could not find device: {}'.format(device))
            if dev.platform != 'eos':
                raise Exception(
                    'Invalid device platform "{}" for device: {}'.format(
                        dev.platform, device))

    # Start tasks to take care of the upgrade
    try:
        nrresult = nr_filtered.run(task=device_upgrade_task,
                                   job_id=job_id,
                                   download=download,
                                   filename=filename,
                                   url=url,
                                   pre_flight=pre_flight,
                                   reboot=reboot,
                                   activate=activate)
        print_result(nrresult)
    except Exception as e:
        logger.exception('Exception while upgrading devices: {}'.format(
            str(e)))
        return NornirJobResult(nrresult=nrresult)

    failed_hosts = list(nrresult.failed_hosts.keys())
    for hostname in failed_hosts:
        logger.error("Firmware upgrade of device '{}' failed".format(hostname))

    if nrresult.failed:
        logger.error("Not all devices were successfully upgraded")

    return NornirJobResult(nrresult=nrresult)
Example #2
0
def device_erase(device_id: int = None, job_id: int = None) -> NornirJobResult:

    with sqla_session() as session:
        dev: Device = session.query(Device).filter(Device.id ==
                                                   device_id).one_or_none()
        if dev:
            hostname = dev.hostname
            device_type = dev.device_type
        else:
            raise Exception('Could not find a device with ID {}'.format(
                device_id))

    if device_type != DeviceType.ACCESS:
        raise Exception('Can only do factory default on access')

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname).filter(managed=True)

    device_list = list(nr_filtered.inventory.hosts.keys())
    logger.info("Device selected: {}".format(
        device_list
    ))

    try:
        nrresult = nr_filtered.run(task=device_erase_task,
                                   hostname=hostname)
        print_result(nrresult)
    except Exception as e:
        logger.exception('Exception while erasing device: {}'.format(
            str(e)))
        return NornirJobResult(nrresult=nrresult)

    failed_hosts = list(nrresult.failed_hosts.keys())
    for hostname in failed_hosts:
        logger.error("Failed to factory default device '{}' failed".format(
            hostname))

    if nrresult.failed:
        logger.error("Factory default failed")

    if failed_hosts == []:
        with sqla_session() as session:
            dev: Device = session.query(Device).filter(Device.id ==
                                                       device_id).one_or_none()
            session.delete(dev)
            session.commit()

    return NornirJobResult(nrresult=nrresult)
Example #3
0
def sync_basetemplate(hostname: Optional[str]=None,
                      device_type: Optional[DeviceType]=None,
                      dry_run: bool=True) -> NornirJobResult:
    """Synchronize base system template to device or device group.

    Args:
        hostname: Hostname of a single device to sync
        device_type: A device group type to sync
        dry_run: Set to true to only perform a NAPALM dry_run of config changes 
    """
    nrresult = None

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    if hostname and isinstance(hostname, str):
        nr_filtered = nr.filter(name=hostname)
    elif device_type and isinstance(device_type, DeviceType):
        group_name = ('T_'+device_type.value)
        nr_filtered = nr.filter(F(groups__contains=group_name))
    else:
        raise ValueError("hostname or device_type must be specified")

    nrresult = nr_filtered.run(task=push_basetemplate)

    return NornirJobResult(
        nrresult = nrresult
    )
Example #4
0
def renew_cert(hostname: Optional[str] = None,
               group: Optional[str] = None,
               job_id: Optional[str] = None,
               scheduled_by: Optional[str] = None) -> NornirJobResult:

    logger = get_logger()
    nr = cnaas_init()
    if hostname:
        nr_filtered, dev_count, _ = inventory_selector(nr, hostname=hostname)
    elif group:
        nr_filtered, dev_count, _ = inventory_selector(nr, group=group)
    else:
        raise ValueError("Neither hostname nor group specified for renew_cert")

    device_list = list(nr_filtered.inventory.hosts.keys())
    logger.info("Device(s) selected for renew certificate ({}): {}".format(
        dev_count, ", ".join(device_list)))

    supported_platforms = ['eos']
    # Make sure we only attempt supported devices
    for device in device_list:
        with sqla_session() as session:
            dev: Device = session.query(Device). \
                filter(Device.hostname == device).one_or_none()
            if not dev:
                raise Exception('Could not find device: {}'.format(device))
            if dev.platform not in supported_platforms:
                raise Exception(
                    'Unsupported device platform "{}" for device: {}'.format(
                        dev.platform, device))

    try:
        nrresult = nr_filtered.run(task=renew_cert_task, job_id=job_id)
    except Exception as e:
        logger.exception('Exception while renewing certificates: {}'.format(
            str(e)))
        return NornirJobResult(nrresult=nrresult)

    failed_hosts = list(nrresult.failed_hosts.keys())
    for hostname in failed_hosts:
        logger.error(
            "Certificate renew on device '{}' failed".format(hostname))

    if nrresult.failed:
        logger.error("Not all devices got new certificates")

    return NornirJobResult(nrresult=nrresult)
Example #5
0
def update_facts(hostname: str,
                 job_id: Optional[str] = None,
                 scheduled_by: Optional[str] = None):
    logger = get_logger()
    with sqla_session() as session:
        dev: Device = session.query(Device).filter(
            Device.hostname == hostname).one_or_none()
        if not dev:
            raise ValueError(
                "Device with hostname {} not found".format(hostname))
        if not (dev.state == DeviceState.MANAGED
                or dev.state == DeviceState.UNMANAGED):
            raise ValueError(
                "Device with hostname {} is in incorrect state: {}".format(
                    hostname, str(dev.state)))
        hostname = dev.hostname

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    nrresult = nr_filtered.run(task=networking.napalm_get, getters=["facts"])

    if nrresult.failed:
        logger.error(
            "Could not contact device with hostname {}".format(hostname))
        return NornirJobResult(nrresult=nrresult)
    try:
        facts = nrresult[hostname][0].result['facts']
        with sqla_session() as session:
            dev: Device = session.query(Device).filter(
                Device.hostname == hostname).one()
            dev.serial = facts['serial_number']
            dev.vendor = facts['vendor']
            dev.model = facts['model']
            dev.os_version = facts['os_version']
        logger.debug("Updating facts for device {}: {}, {}, {}, {}".format(
            hostname, facts['serial_number'], facts['vendor'], facts['model'],
            facts['os_version']))
    except Exception as e:
        logger.exception(
            "Could not update device with hostname {} with new facts: {}".
            format(hostname, str(e)))
        logger.debug("Get facts nrresult for hostname {}: {}".format(
            hostname, nrresult))
        raise e

    return NornirJobResult(nrresult=nrresult)
Example #6
0
def init_access_device_step2(device_id: int,
                             iteration: int = -1) -> NornirJobResult:
    # step4+ in apjob: if success, update management ip and device state, trigger external stuff?
    with sqla_session() as session:
        dev = session.query(Device).filter(Device.id == device_id).one()
        if dev.state != DeviceState.INIT:
            logger.error("Device with ID {} got to init step2 but is in incorrect state: {}".\
                         format(device_id, dev.state.name))
            raise DeviceStateException(
                "Device must be in state INIT to continue init step 2")
        hostname = dev.hostname
    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    nrresult = nr_filtered.run(task=networking.napalm_get, getters=["facts"])

    if nrresult.failed:
        next_job_id = schedule_init_access_device_step2(device_id, iteration)
        if next_job_id:
            return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
        else:
            return NornirJobResult(nrresult=nrresult)
    try:
        facts = nrresult[hostname][0].result['facts']
        found_hostname = facts['hostname']
    except:
        raise InitError("Could not log in to device during init step 2")
    if hostname != found_hostname:
        raise InitError("Newly initialized device presents wrong hostname")

    with sqla_session() as session:
        dev: Device = session.query(Device).filter(
            Device.id == device_id).one()
        dev.state = DeviceState.MANAGED
        dev.device_type = DeviceType.ACCESS
        dev.synchronized = False
        #TODO: remove dhcp_ip ?

    try:
        update_interfacedb(hostname, replace=True)
    except Exception as e:
        logger.exception(
            "Exception while updating interface database for device {}: {}".\
            format(hostname, str(e)))

    return NornirJobResult(nrresult=nrresult)
Example #7
0
def apply_config(hostname: str,
                 config: str,
                 dry_run: bool,
                 job_id: Optional[int] = None,
                 scheduled_by: Optional[str] = None) -> NornirJobResult:
    """Apply a static configuration (from backup etc) to a device.

    Args:
        hostname: Specify a single host by hostname to synchronize
        config: Static configuration to apply
        dry_run: Set to false to actually apply config to device
        job_id: Job ID number
        scheduled_by: Username from JWT

    Returns:
        NornirJobResult
    """
    logger = get_logger()

    with sqla_session() as session:
        dev: Device = session.query(Device).filter(
            Device.hostname == hostname).one_or_none()
        if not dev:
            raise Exception("Device {} not found".format(hostname))
        elif not (dev.state == DeviceState.MANAGED
                  or dev.state == DeviceState.UNMANAGED):
            raise Exception("Device {} is in invalid state: {}".format(
                hostname, dev.state))
        if not dry_run:
            dev.state = DeviceState.UNMANAGED
            dev.synchronized = False

    nr = cnaas_init()
    nr_filtered, _, _ = inventory_selector(nr, hostname=hostname)

    try:
        nrresult = nr_filtered.run(task=push_static_config,
                                   config=config,
                                   dry_run=dry_run,
                                   job_id=job_id)
    except Exception as e:
        logger.exception("Exception in apply_config: {}".format(e))

    return NornirJobResult(nrresult=nrresult)
Example #8
0
def discover_device(ztp_mac: str,
                    dhcp_ip: str,
                    iteration: int,
                    job_id: Optional[str] = None,
                    scheduled_by: Optional[str] = None):
    logger = get_logger()
    with sqla_session() as session:
        dev: Device = session.query(Device).filter(
            Device.ztp_mac == ztp_mac).one_or_none()
        if not dev:
            raise ValueError(
                "Device with ztp_mac {} not found".format(ztp_mac))
        if dev.state != DeviceState.DHCP_BOOT:
            raise ValueError(
                "Device with ztp_mac {} is in incorrect state: {}".format(
                    ztp_mac, str(dev.state)))
        if str(dev.dhcp_ip) != dhcp_ip:
            dev.dhcp_ip = dhcp_ip
        hostname = dev.hostname

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    nrresult = nr_filtered.run(task=napalm_get, getters=["facts"])

    if nrresult.failed:
        logger.info(
            "Could not contact device with ztp_mac {} (attempt {})".format(
                ztp_mac, iteration))
        next_job_id = schedule_discover_device(ztp_mac, dhcp_ip, iteration + 1,
                                               scheduled_by)
        if next_job_id:
            return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
        else:
            return NornirJobResult(nrresult=nrresult)
    try:
        facts = nrresult[hostname][0].result['facts']
        with sqla_session() as session:
            dev: Device = session.query(Device).filter(
                Device.ztp_mac == ztp_mac).one()
            dev.serial = facts['serial_number'][:64]
            dev.vendor = facts['vendor'][:64]
            dev.model = facts['model'][:64]
            dev.os_version = facts['os_version'][:64]
            dev.state = DeviceState.DISCOVERED
            new_hostname = dev.hostname
            logger.info(f"Device with ztp_mac {ztp_mac} successfully scanned" +
                        f"(attempt {iteration}), moving to DISCOVERED state")
    except Exception as e:
        logger.exception(
            "Could not update device with ztp_mac {} with new facts: {}".
            format(ztp_mac, str(e)))
        logger.debug("nrresult for ztp_mac {}: {}".format(ztp_mac, nrresult))
        raise e

    nrresult_hostname = nr_filtered.run(task=set_hostname_task,
                                        new_hostname=new_hostname)
    if nrresult_hostname.failed:
        logger.info("Could not set hostname for ztp_mac: {}".format(ztp_mac))

    return NornirJobResult(nrresult=nrresult)
Example #9
0
def init_device_step2(device_id: int, iteration: int = -1,
                      job_id: Optional[str] = None,
                      scheduled_by: Optional[str] = None) -> \
                      NornirJobResult:
    logger = get_logger()
    # step4+ in apjob: if success, update management ip and device state, trigger external stuff?
    with sqla_session() as session:
        dev = session.query(Device).filter(Device.id == device_id).one()
        if dev.state != DeviceState.INIT:
            logger.error("Device with ID {} got to init step2 but is in incorrect state: {}".\
                         format(device_id, dev.state.name))
            raise DeviceStateException(
                "Device must be in state INIT to continue init step 2")
        hostname = dev.hostname
        devtype: DeviceType = dev.device_type
    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    nrresult = nr_filtered.run(task=napalm_get, getters=["facts"])

    if nrresult.failed:
        next_job_id = schedule_init_device_step2(device_id, iteration,
                                                 scheduled_by)
        if next_job_id:
            return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
        else:
            return NornirJobResult(nrresult=nrresult)
    try:
        facts = nrresult[hostname][0].result['facts']
        found_hostname = facts['hostname']
    except:
        raise InitError("Could not log in to device during init step 2")
    if hostname != found_hostname:
        raise InitError("Newly initialized device presents wrong hostname")

    with sqla_session() as session:
        dev: Device = session.query(Device).filter(
            Device.id == device_id).one()
        dev.state = DeviceState.MANAGED
        dev.synchronized = False
        set_facts(dev, facts)
        management_ip = dev.management_ip
        dev.dhcp_ip = None

    # Plugin hook: new managed device
    # Send: hostname , device type , serial , platform , vendor , model , os version
    try:
        pmh = PluginManagerHandler()
        pmh.pm.hook.new_managed_device(hostname=hostname,
                                       device_type=devtype.name,
                                       serial_number=facts['serial_number'],
                                       vendor=facts['vendor'],
                                       model=facts['model'],
                                       os_version=facts['os_version'],
                                       management_ip=str(management_ip))
    except Exception as e:
        logger.exception(
            "Error while running plugin hooks for new_managed_device: ".format(
                str(e)))

    return NornirJobResult(nrresult=nrresult)
Example #10
0
def init_fabric_device_step1(
        device_id: int,
        new_hostname: str,
        device_type: str,
        neighbors: Optional[List[str]] = [],
        job_id: Optional[str] = None,
        scheduled_by: Optional[str] = None) -> NornirJobResult:
    """Initialize fabric (CORE/DIST) device for management by CNaaS-NMS.

    Args:
        device_id: Device to select for initialization
        new_hostname: Hostname to configure on this device
        device_type: String representing DeviceType
        neighbors: Optional list of hostnames of peer devices
        job_id: job_id provided by scheduler when adding job
        scheduled_by: Username from JWT.

    Returns:
        Nornir result object

    Raises:
        DeviceStateException
        ValueError
    """
    logger = get_logger()
    if DeviceType.has_name(device_type):
        devtype = DeviceType[device_type]
    else:
        raise ValueError("Invalid 'device_type' provided")

    if devtype not in [DeviceType.CORE, DeviceType.DIST]:
        raise ValueError(
            "Init fabric device requires device type DIST or CORE")

    with sqla_session() as session:
        dev = pre_init_checks(session, device_id)

        # Test update of linknets using LLDP data
        linknets = update_linknets(session,
                                   dev.hostname,
                                   devtype,
                                   ztp_hostname=new_hostname,
                                   dry_run=True)

        try:
            verified_neighbors = pre_init_check_neighbors(
                session, dev, devtype, linknets, neighbors)
            logger.debug("Found valid neighbors for INIT of {}: {}".format(
                new_hostname, ", ".join(verified_neighbors)))
            check_neighbor_sync(session, verified_neighbors)
        except Exception as e:
            raise e
        else:
            dev.state = DeviceState.INIT
            dev.device_type = devtype
            session.commit()
            # If neighbor check works, commit new linknets
            # This will also mark neighbors as unsynced
            linknets = update_linknets(session,
                                       dev.hostname,
                                       devtype,
                                       ztp_hostname=new_hostname,
                                       dry_run=False)
            logger.debug("New linknets for INIT of {} created: {}".format(
                new_hostname, linknets))

        # Select and reserve a new management and infra IP for the device
        ReservedIP.clean_reservations(session, device=dev)
        session.commit()

        mgmt_ip = cnaas_nms.confpush.underlay.find_free_mgmt_lo_ip(session)
        infra_ip = cnaas_nms.confpush.underlay.find_free_infra_ip(session)

        reserved_ip = ReservedIP(device=dev, ip=mgmt_ip)
        session.add(reserved_ip)
        dev.infra_ip = infra_ip
        session.commit()

        mgmt_variables = {
            'mgmt_ipif': str(IPv4Interface('{}/32'.format(mgmt_ip))),
            'mgmt_prefixlen': 32,
            'infra_ipif': str(IPv4Interface('{}/32'.format(infra_ip))),
            'infra_ip': str(infra_ip),
        }

        device_variables = populate_device_vars(session, dev, new_hostname,
                                                devtype)
        device_variables = {**device_variables, **mgmt_variables}
        # Update device state
        dev.hostname = new_hostname
        session.commit()
        hostname = dev.hostname

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    # TODO: certicate

    # step2. push management config
    nrresult = nr_filtered.run(task=push_base_management,
                               device_variables=device_variables,
                               devtype=devtype,
                               job_id=job_id)

    with sqla_session() as session:
        dev = session.query(Device).filter(Device.id == device_id).one()
        dev.management_ip = mgmt_ip
        # Remove the reserved IP since it's now saved in the device database instead
        reserved_ip = session.query(ReservedIP).filter(
            ReservedIP.device == dev).one_or_none()
        if reserved_ip:
            session.delete(reserved_ip)

    # Plugin hook, allocated IP
    try:
        pmh = PluginManagerHandler()
        pmh.pm.hook.allocated_ipv4(vrf='mgmt',
                                   ipv4_address=str(mgmt_ip),
                                   ipv4_network=None,
                                   hostname=hostname)
    except Exception as e:
        logger.exception(
            "Error while running plugin hooks for allocated_ipv4: ".format(
                str(e)))

    # step3. resync neighbors
    scheduler = Scheduler()
    sync_nei_job_id = scheduler.add_onetime_job(
        'cnaas_nms.confpush.sync_devices:sync_devices',
        when=1,
        scheduled_by=scheduled_by,
        kwargs={
            'hostnames': verified_neighbors,
            'dry_run': False
        })
    logger.info(f"Scheduled job {sync_nei_job_id} to resynchronize neighbors")

    # step4. register apscheduler job that continues steps
    scheduler = Scheduler()
    next_job_id = scheduler.add_onetime_job(
        'cnaas_nms.confpush.init_device:init_device_step2',
        when=60,
        scheduled_by=scheduled_by,
        kwargs={
            'device_id': device_id,
            'iteration': 1
        })

    logger.info("Init step 2 for {} scheduled as job # {}".format(
        new_hostname, next_job_id))

    return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
Example #11
0
def init_access_device_step1(
        device_id: int,
        new_hostname: str,
        mlag_peer_id: Optional[int] = None,
        mlag_peer_new_hostname: Optional[str] = None,
        uplink_hostnames_arg: Optional[List[str]] = [],
        job_id: Optional[str] = None,
        scheduled_by: Optional[str] = None) -> NornirJobResult:
    """Initialize access device for management by CNaaS-NMS.
    If a MLAG/MC-LAG pair is to be configured both mlag_peer_id and
    mlag_peer_new_hostname must be set.

    Args:
        device_id: Device to select for initialization
        new_hostname: Hostname to configure on this device
        mlag_peer_id: Device ID of MLAG peer device (optional)
        mlag_peer_new_hostname: Hostname to configure on peer device (optional)
        uplink_hostnames_arg: List of hostnames of uplink peer devices (optional)
                              Used when initializing MLAG peer device
        job_id: job_id provided by scheduler when adding job
        scheduled_by: Username from JWT.

    Returns:
        Nornir result object

    Raises:
        DeviceStateException
        ValueError
    """
    logger = get_logger()
    with sqla_session() as session:
        dev = pre_init_checks(session, device_id)

        # update linknets using LLDP data
        update_linknets(session, dev.hostname, DeviceType.ACCESS)

        # If this is the first device in an MLAG pair
        if mlag_peer_id and mlag_peer_new_hostname:
            mlag_peer_dev = pre_init_checks(session, mlag_peer_id)
            update_linknets(session, mlag_peer_dev.hostname, DeviceType.ACCESS)
            update_interfacedb_worker(
                session,
                dev,
                replace=True,
                delete_all=False,
                mlag_peer_hostname=mlag_peer_dev.hostname)
            update_interfacedb_worker(session,
                                      mlag_peer_dev,
                                      replace=True,
                                      delete_all=False,
                                      mlag_peer_hostname=dev.hostname)
            uplink_hostnames = dev.get_uplink_peer_hostnames(session)
            uplink_hostnames += mlag_peer_dev.get_uplink_peer_hostnames(
                session)
            # check that both devices see the correct MLAG peer
            pre_init_check_mlag(session, dev, mlag_peer_dev)
            pre_init_check_mlag(session, mlag_peer_dev, dev)
        # If this is the second device in an MLAG pair
        elif uplink_hostnames_arg:
            uplink_hostnames = uplink_hostnames_arg
        elif mlag_peer_id or mlag_peer_new_hostname:
            raise ValueError(
                "mlag_peer_id and mlag_peer_new_hostname must be specified together"
            )
        # If this device is not part of an MLAG pair
        else:
            update_interfacedb_worker(session,
                                      dev,
                                      replace=True,
                                      delete_all=False)
            uplink_hostnames = dev.get_uplink_peer_hostnames(session)

        # TODO: check compatability, same dist pair and same ports on dists
        mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain(
            session, uplink_hostnames)
        if not mgmtdomain:
            raise Exception(
                "Could not find appropriate management domain for uplink peer devices: {}"
                .format(uplink_hostnames))
        # Select a new management IP for the device
        ReservedIP.clean_reservations(session, device=dev)
        session.commit()
        mgmt_ip = mgmtdomain.find_free_mgmt_ip(session)
        if not mgmt_ip:
            raise Exception(
                "Could not find free management IP for management domain {}/{}"
                .format(mgmtdomain.id, mgmtdomain.description))
        reserved_ip = ReservedIP(device=dev, ip=mgmt_ip)
        session.add(reserved_ip)
        # Populate variables for template rendering
        mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw)
        mgmt_variables = {
            'mgmt_ipif':
            str(
                IPv4Interface('{}/{}'.format(mgmt_ip,
                                             mgmt_gw_ipif.network.prefixlen))),
            'mgmt_ip':
            str(mgmt_ip),
            'mgmt_prefixlen':
            int(mgmt_gw_ipif.network.prefixlen),
            'mgmt_vlan_id':
            mgmtdomain.vlan,
            'mgmt_gw':
            mgmt_gw_ipif.ip,
        }
        device_variables = populate_device_vars(session, dev, new_hostname,
                                                DeviceType.ACCESS)
        device_variables = {**device_variables, **mgmt_variables}
        # Update device state
        dev.hostname = new_hostname
        session.commit()
        hostname = dev.hostname

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    # step2. push management config
    nrresult = nr_filtered.run(task=push_base_management,
                               device_variables=device_variables,
                               devtype=DeviceType.ACCESS,
                               job_id=job_id)

    with sqla_session() as session:
        dev = session.query(Device).filter(Device.id == device_id).one()
        dev.management_ip = device_variables['mgmt_ip']
        dev.state = DeviceState.INIT
        dev.device_type = DeviceType.ACCESS
        # Remove the reserved IP since it's now saved in the device database instead
        reserved_ip = session.query(ReservedIP).filter(
            ReservedIP.device == dev).one_or_none()
        if reserved_ip:
            session.delete(reserved_ip)

    # Plugin hook, allocated IP
    try:
        pmh = PluginManagerHandler()
        pmh.pm.hook.allocated_ipv4(vrf='mgmt',
                                   ipv4_address=str(mgmt_ip),
                                   ipv4_network=str(mgmt_gw_ipif.network),
                                   hostname=hostname)
    except Exception as e:
        logger.exception(
            "Error while running plugin hooks for allocated_ipv4: ".format(
                str(e)))

    # step3. register apscheduler job that continues steps
    if mlag_peer_id and mlag_peer_new_hostname:
        step2_delay = 30 + 60 + 30  # account for delayed start of peer device plus mgmt timeout
    else:
        step2_delay = 30
    scheduler = Scheduler()
    next_job_id = scheduler.add_onetime_job(
        'cnaas_nms.confpush.init_device:init_device_step2',
        when=step2_delay,
        scheduled_by=scheduled_by,
        kwargs={
            'device_id': device_id,
            'iteration': 1
        })

    logger.info("Init step 2 for {} scheduled as job # {}".format(
        new_hostname, next_job_id))

    if mlag_peer_id and mlag_peer_new_hostname:
        mlag_peer_job_id = scheduler.add_onetime_job(
            'cnaas_nms.confpush.init_device:init_access_device_step1',
            when=60,
            scheduled_by=scheduled_by,
            kwargs={
                'device_id': mlag_peer_id,
                'new_hostname': mlag_peer_new_hostname,
                'uplink_hostnames_arg': uplink_hostnames,
                'scheduled_by': scheduled_by
            })
        logger.info("MLAG peer (id {}) init scheduled as job # {}".format(
            mlag_peer_id, mlag_peer_job_id))

    return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id)
Example #12
0
def sync_devices(hostnames: Optional[List[str]] = None, device_type: Optional[str] = None,
                 group: Optional[str] = None, dry_run: bool = True, force: bool = False,
                 auto_push: bool = False, job_id: Optional[int] = None,
                 scheduled_by: Optional[str] = None, resync: bool = False) -> NornirJobResult:
    """Synchronize devices to their respective templates. If no arguments
    are specified then synchronize all devices that are currently out
    of sync.

    Args:
        hostname: Specify a single host by hostname to synchronize
        device_type: Specify a device type to synchronize
        group: Specify a group of devices to synchronize
        dry_run: Don't commit generated config to device
        force: Commit config even if changes made outside CNaaS will get
               overwritten
        auto_push: Automatically do live-run after dry-run if change score is low
        job_id: job_id provided by scheduler when adding a new job
        scheduled_by: Username from JWT
        resync: Re-synchronize a device even if it's marked as synced in the
                database, a device selected by hostname is always re-synced

    Returns:
        NornirJobResult
    """
    logger = get_logger()
    nr = cnaas_init()
    dev_count = 0
    skipped_hostnames = []
    if hostnames:
        nr_filtered, dev_count, skipped_hostnames = \
            inventory_selector(nr, hostname=hostnames)
    else:
        if device_type:
            nr_filtered, dev_count, skipped_hostnames = \
                inventory_selector(nr, resync=resync, device_type=device_type)
        elif group:
            nr_filtered, dev_count, skipped_hostnames = \
                inventory_selector(nr, resync=resync, group=group)
        else:
            # all devices
            nr_filtered, dev_count, skipped_hostnames = \
                inventory_selector(nr, resync=resync)

    if skipped_hostnames:
        logger.info("Device(s) already synchronized, skipping ({}): {}".format(
            len(skipped_hostnames), ", ".join(skipped_hostnames)
        ))

    device_list = list(nr_filtered.inventory.hosts.keys())
    logger.info("Device(s) selected for synchronization ({}): {}".format(
        dev_count, ", ".join(device_list)
    ))

    try:
        nrresult = nr_filtered.run(task=sync_check_hash,
                                   force=force,
                                   job_id=job_id)
    except Exception as e:
        logger.exception("Exception while checking config hash: {}".format(str(e)))
        raise e
    else:
        if nrresult.failed:
            # Mark devices as unsynchronized if config hash check failed
            with sqla_session() as session:
                session.query(Device).filter(Device.hostname.in_(nrresult.failed_hosts.keys())).\
                    update({Device.synchronized: False}, synchronize_session=False)
            raise Exception('Configuration hash check failed for {}'.format(
                ' '.join(nrresult.failed_hosts.keys())))

    if not dry_run:
        with sqla_session() as session:
            logger.info("Trying to acquire lock for devices to run syncto job: {}".format(job_id))
            if not Joblock.acquire_lock(session, name='devices', job_id=job_id):
                raise JoblockError("Unable to acquire lock for configuring devices")

    try:
        nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run,
                                   job_id=job_id)
    except Exception as e:
        logger.exception("Exception while synchronizing devices: {}".format(str(e)))
        try:
            if not dry_run:
                with sqla_session() as session:
                    logger.info("Releasing lock for devices from syncto job: {}".format(job_id))
                    Joblock.release_lock(session, job_id=job_id)
        except Exception:
            logger.error("Unable to release devices lock after syncto job")
        return NornirJobResult(nrresult=nrresult)

    failed_hosts = list(nrresult.failed_hosts.keys())
    for hostname in failed_hosts:
        logger.error("Synchronization of device '{}' failed".format(hostname))

    if nrresult.failed:
        logger.error("Not all devices were successfully synchronized")

    total_change_score = 1
    change_scores = []
    changed_hosts = []
    unchanged_hosts = []
    # calculate change impact score
    for host, results in nrresult.items():
        if len(results) != 3:
            logger.debug("Unable to calculate change score for failed device {}".format(host))
        elif results[2].diff:
            changed_hosts.append(host)
            if "change_score" in results[0].host:
                change_scores.append(results[0].host["change_score"])
                logger.debug("Change score for host {}: {:.1f}".format(
                    host, results[0].host["change_score"]))
        else:
            unchanged_hosts.append(host)
            change_scores.append(0)
            logger.debug("Empty diff for host {}, 0 change score".format(
                host))

    nr_confighash = None
    if dry_run and force:
        # update config hash for devices that had an empty diff because local
        # changes on a device can cause reordering of CLI commands that results
        # in config hash mismatch even if the calculated diff was empty
        def include_filter(host, include_list=unchanged_hosts):
            if host.name in include_list:
                return True
            else:
                return False
        nr_confighash = nr_filtered.filter(filter_func=include_filter)
    elif not dry_run:
        # set new config hash for devices that was successfully updated
        def exclude_filter(host, exclude_list=failed_hosts+unchanged_hosts):
            if host.name in exclude_list:
                return False
            else:
                return True
        nr_confighash = nr_filtered.filter(filter_func=exclude_filter)

    if nr_confighash:
        try:
            nrresult_confighash = nr_confighash.run(task=update_config_hash)
        except Exception as e:
            logger.exception("Exception while updating config hashes: {}".format(str(e)))
        else:
            if nrresult_confighash.failed:
                logger.error("Unable to update some config hashes: {}".format(
                    list(nrresult_confighash.failed_hosts.keys())))

    # set devices as synchronized if needed
    with sqla_session() as session:
        for hostname in changed_hosts:
            if dry_run:
                dev: Device = session.query(Device).filter(Device.hostname == hostname).one()
                dev.synchronized = False
            else:
                dev: Device = session.query(Device).filter(Device.hostname == hostname).one()
                dev.synchronized = True
        for hostname in unchanged_hosts:
            dev: Device = session.query(Device).filter(Device.hostname == hostname).one()
            dev.synchronized = True
        if not dry_run:
            logger.info("Releasing lock for devices from syncto job: {}".format(job_id))
            Joblock.release_lock(session, job_id=job_id)

    if len(device_list) == 0:
        total_change_score = 0
    elif not change_scores or total_change_score >= 100 or failed_hosts:
        total_change_score = 100
    else:
        # use individual max as total_change_score, range 1-100
        total_change_score = max(min(int(max(change_scores) + 0.5), 100), 1)
    logger.info(
        "Change impact score: {:.1f} (dry_run: {}, selected devices: {}, changed devices: {})".
            format(total_change_score, dry_run, len(device_list), len(changed_hosts)))

    next_job_id = None
    if auto_push and len(device_list) == 1 and hostnames and dry_run:
        if not changed_hosts:
            logger.info("None of the selected host has any changes (diff), skipping auto-push")
        elif total_change_score < AUTOPUSH_MAX_SCORE:
            scheduler = Scheduler()
            next_job_id = scheduler.add_onetime_job(
                'cnaas_nms.confpush.sync_devices:sync_devices',
                when=0,
                scheduled_by=scheduled_by,
                kwargs={'hostnames': hostnames, 'dry_run': False, 'force': force})
            logger.info(f"Auto-push scheduled live-run of commit as job id {next_job_id}")
        else:
            logger.info(
                f"Auto-push of config to device {hostnames} failed because change score of "
                f"{total_change_score} is higher than auto-push limit {AUTOPUSH_MAX_SCORE}"
            )

    return NornirJobResult(nrresult=nrresult, next_job_id=next_job_id, change_score=total_change_score)
Example #13
0
def sync_devices(hostname: Optional[str] = None,
                 device_type: Optional[str] = None,
                 dry_run: bool = True,
                 force: bool = False) -> NornirJobResult:
    """Synchronize devices to their respective templates. If no arguments
    are specified then synchronize all devices that are currently out
    of sync.

    Args:
        hostname: Specify a single host by hostname to synchronize
        device_type: Specify a device type to synchronize

    Returns:
        NornirJobResult
    """
    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    if hostname:
        nr_filtered = nr.filter(name=hostname).filter(managed=True)
    elif device_type:
        nr_filtered = nr.filter(F(groups__contains='T_' +
                                  device_type))  # device type
    else:
        nr_filtered = nr.filter(synchronized=False).filter(
            managed=True)  # all unsynchronized devices

    device_list = list(nr_filtered.inventory.hosts.keys())
    logger.info(
        "Device(s) selected for synchronization: {}".format(device_list))

    alterned_devices = []
    for device in device_list:
        stored_config_hash = Device.get_config_hash(device)
        if stored_config_hash is None:
            continue
        current_config_hash = get_running_config_hash(device)
        if current_config_hash is None:
            raise Exception('Failed to get configuration hash')
        if stored_config_hash != current_config_hash:
            logger.info(
                "Device {} configuration is altered outside of CNaaS!".format(
                    device))
            alterned_devices.append(device)
    if alterned_devices != [] and force is False:
        raise Exception(
            'Configuration for {} is altered outside of CNaaS'.format(
                ', '.join(alterned_devices)))

    try:
        nrresult = nr_filtered.run(task=push_sync_device, dry_run=dry_run)
        print_result(nrresult)
    except Exception as e:
        logger.exception("Exception while synchronizing devices: {}".format(
            str(e)))
        return NornirJobResult(nrresult=nrresult)

    failed_hosts = list(nrresult.failed_hosts.keys())

    if not dry_run:
        for key in nrresult.keys():
            if key in failed_hosts:
                continue
            new_config_hash = get_running_config_hash(key)
            if new_config_hash is None:
                raise Exception('Failed to get configuration hash')
            Device.set_config_hash(key, new_config_hash)

        with sqla_session() as session:
            for hostname in device_list:
                if hostname in failed_hosts:
                    logger.error(
                        "Synchronization of device '{}' failed".format(
                            hostname))
                    continue
                dev: Device = session.query(Device).filter(
                    Device.hostname == hostname).one()
                dev.synchronized = True

    if nrresult.failed:
        logger.error("Not all devices were successfully synchronized")

    return NornirJobResult(nrresult=nrresult)
Example #14
0
def init_access_device_step1(device_id: int,
                             new_hostname: str) -> NornirJobResult:
    """Initialize access device for management by CNaaS-NMS

    Args:
        hostname (str): Hostname of device to initialize

    Returns:
        Nornir result object

    Raises:
        DeviceStateException
    """
    # Check that we can find device and that it's in the correct state to start init
    with sqla_session() as session:
        dev: Device = session.query(Device).filter(
            Device.id == device_id).one()
        if dev.state != DeviceState.DISCOVERED:
            raise DeviceStateException(
                "Device must be in state DISCOVERED to begin init")
        old_hostname = dev.hostname
    # Perform connectivity check
    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_old_filtered = nr.filter(name=old_hostname)
    try:
        nrresult_old = nr_old_filtered.run(task=networking.napalm_get,
                                           getters=["facts"])
    except Exception as e:
        raise ConnectionCheckError(
            f"Failed to connect to device_id {device_id}: {str(e)}")
    if nrresult_old.failed:
        raise ConnectionCheckError(
            f"Failed to connect to device_id {device_id}")

    cnaas_nms.confpush.get.update_linknets(old_hostname)
    uplinks = []
    neighbor_hostnames = []
    with sqla_session() as session:
        dev = session.query(Device).filter(
            Device.hostname == old_hostname).one()
        for neighbor_d in dev.get_neighbors(session):
            if neighbor_d.device_type == DeviceType.DIST:
                local_if = dev.get_link_to_local_ifname(session, neighbor_d)
                if local_if:
                    uplinks.append({'ifname': local_if})
                    neighbor_hostnames.append(neighbor_d.hostname)
        logger.debug("Uplinks for device {} detected: {} neighbor_hostnames: {}".\
                     format(device_id, uplinks, neighbor_hostnames))
        #TODO: check compatability, same dist pair and same ports on dists
        mgmtdomain = cnaas_nms.db.helper.find_mgmtdomain(
            session, neighbor_hostnames)
        if not mgmtdomain:
            raise Exception(
                "Could not find appropriate management domain for uplink peer devices: {}"
                .format(neighbor_hostnames))
        mgmt_ip = mgmtdomain.find_free_mgmt_ip(session)
        if not mgmt_ip:
            raise Exception(
                "Could not find free management IP for management domain {}".
                format(mgmtdomain.id))
        mgmt_gw_ipif = IPv4Interface(mgmtdomain.ipv4_gw)
        device_variables = {
            'mgmt_ipif':
            IPv4Interface('{}/{}'.format(mgmt_ip,
                                         mgmt_gw_ipif.network.prefixlen)),
            'uplinks':
            uplinks,
            'mgmt_vlan_id':
            mgmtdomain.vlan,
            'mgmt_gw':
            mgmt_gw_ipif.ip
        }
        dev = session.query(Device).filter(Device.id == device_id).one()
        dev.state = DeviceState.INIT
        dev.hostname = new_hostname
        session.commit()
        hostname = dev.hostname

    nr = cnaas_nms.confpush.nornir_helper.cnaas_init()
    nr_filtered = nr.filter(name=hostname)

    # step2. push management config
    try:
        nrresult = nr_filtered.run(task=push_base_management,
                                   device_variables=device_variables)
    except Exception as e:
        pass  # ignore exception, we expect to loose connectivity.
        # sometimes we get no exception here, but it's saved in result
        # other times we get socket.timeout, pyeapi.eapilib.ConnectionError or
        # napalm.base.exceptions.ConnectionException to handle here?
    if not nrresult.failed:
        raise  #we don't expect success here

    print_result(nrresult)

    with sqla_session() as session:
        dev = session.query(Device).filter(Device.id == device_id).one()
        dev.management_ip = device_variables['mgmt_ipif'].ip

    # step3. register apscheduler job that continues steps

    scheduler = Scheduler()
    next_job = scheduler.add_onetime_job(
        'cnaas_nms.confpush.init_device:init_access_device_step2',
        when=0,
        kwargs={
            'device_id': device_id,
            'iteration': 1
        })

    logger.debug(f"Step 2 scheduled as ID {next_job.id}")

    return NornirJobResult(nrresult=nrresult, next_job_id=next_job.id)