Exemple #1
0
def get_app():
    from cnaas_nms.scheduler.scheduler import Scheduler
    from cnaas_nms.plugins.pluginmanager import PluginManagerHandler
    from cnaas_nms.db.session import sqla_session
    from cnaas_nms.db.joblock import Joblock
    from cnaas_nms.db.job import Job
    # If running inside uwsgi, a separate "mule" will run the scheduler
    try:
        import uwsgi
        print("Running inside uwsgi")
    except (ModuleNotFoundError, ImportError):
        scheduler = Scheduler()
        scheduler.start()

    pmh = PluginManagerHandler()
    pmh.load_plugins()

    try:
        with sqla_session() as session:
            Joblock.clear_locks(session)
    except Exception as e:
        print("Unable to clear old locks from database at startup: {}".format(str(e)))

    try:
        with sqla_session() as session:
            Job.clear_jobs(session)
    except Exception as e:
        print("Unable to clear jobs with invalid states: {}".format(str(e)))
    return app.app
Exemple #2
0
def refresh_repo(repo_type: RepoType = RepoType.TEMPLATES,
                 scheduled_by: str = None) -> str:
    """Refresh the repository for repo_type

    Args:
        repo_type: Which repository to refresh

    Returns:
        String describing what was updated.

    Raises:
        cnaas_nms.db.settings.SettingsSyntaxError
        cnaas_nms.db.joblock.JoblockError
    """
    # Acquire lock for devices to make sure no one refreshes the repository
    # while another task is building configuration for devices using repo data
    with sqla_session() as session:
        job = Job()
        job.start_job(function_name="refresh_repo", scheduled_by=scheduled_by)
        session.add(job)
        session.flush()
        job_id = job.id

        logger.info(
            "Trying to acquire lock for devices to run refresh repo: {}".
            format(job_id))
        if not Joblock.acquire_lock(session, name='devices', job_id=job_id):
            raise JoblockError(
                "Unable to acquire lock for configuring devices")
        try:
            result = _refresh_repo_task(repo_type)
            job.finish_time = datetime.datetime.utcnow()
            job.status = JobStatus.FINISHED
            job.result = {"message": result, "repository": repo_type.name}
            try:
                logger.info(
                    "Releasing lock for devices from refresh repo job: {}".
                    format(job_id))
                Joblock.release_lock(session, job_id=job_id)
            except Exception:
                logger.error(
                    "Unable to release devices lock after refresh repo job")
            return result
        except Exception as e:
            logger.exception(
                "Exception while scheduling job for refresh repo: {}".format(
                    str(e)))
            job.finish_time = datetime.datetime.utcnow()
            job.status = JobStatus.EXCEPTION
            job.result = {"error": str(e), "repository": repo_type.name}
            try:
                logger.info(
                    "Releasing lock for devices from refresh repo job: {}".
                    format(job_id))
                Joblock.release_lock(session, job_id=job_id)
            except Exception:
                logger.error(
                    "Unable to release devices lock after refresh repo job")
            raise e
Exemple #3
0
    def add_onetime_job(self,
                        func: Union[str, FunctionType],
                        when: Optional[int] = None,
                        scheduled_by: Optional[str] = None,
                        **kwargs) -> int:
        """Schedule a job to run at a later time.

        Args:
            func: The function to call
            when: Optional number of seconds to wait before starting job
            **kwargs: Arguments to pass through to called function
        Returns:
            int: job_id
        """
        if when and isinstance(when, int):
            trigger = 'date'
            run_date = datetime.datetime.utcnow() + datetime.timedelta(
                seconds=when)
        else:
            trigger = None
            run_date = None

        with sqla_session() as session:
            job = Job()
            if run_date:
                job.scheduled_time = run_date
            session.add(job)
            session.flush()
            job_id = job.id

        kwargs['job_id'] = job_id
        kwargs['scheduled_by'] = scheduled_by
        if self.use_mule:
            try:
                import uwsgi
            except Exception as e:
                logger.exception("use_mule is set but not running in uwsgi")
                raise e
            args = dict(kwargs)
            if isinstance(func, FunctionType):
                args['func'] = str(func.__qualname__)
            else:
                args['func'] = str(func)
            args['trigger'] = trigger
            args['when'] = when
            args['id'] = str(job_id)
            uwsgi.mule_msg(json.dumps(args))
            return job_id
        else:
            self._scheduler.add_job(func,
                                    trigger=trigger,
                                    kwargs=kwargs,
                                    id=str(job_id),
                                    run_date=run_date)
            return job_id
Exemple #4
0
    def post(self, hostname: str):
        """Restore configuration to previous version"""
        json_data = request.get_json()
        apply_kwargs = {'hostname': hostname}
        config = None
        if not Device.valid_hostname(hostname):
            return empty_result(status='error',
                                data=f"Invalid hostname specified"), 400

        if 'job_id' in json_data:
            try:
                job_id = int(json_data['job_id'])
            except Exception:
                return empty_result('error', "job_id must be an integer"), 400
        else:
            return empty_result('error', "job_id must be specified"), 400

        with sqla_session() as session:
            try:
                prev_config_result = Job.get_previous_config(session,
                                                             hostname,
                                                             job_id=job_id)
                failed = prev_config_result['failed']
                if not failed and 'config' in prev_config_result:
                    config = prev_config_result['config']
            except JobNotFoundError as e:
                return empty_result('error', str(e)), 404
            except InvalidJobError as e:
                return empty_result('error', str(e)), 500
            except Exception as e:
                return empty_result('error',
                                    "Unhandled exception: {}".format(e)), 500

        if failed:
            return empty_result(
                'error', "The specified job_id has a failed status"), 400

        if not config:
            return empty_result('error', "No config found in this job"), 500

        if 'dry_run' in json_data and isinstance(json_data['dry_run'], bool) \
                and not json_data['dry_run']:
            apply_kwargs['dry_run'] = False
        else:
            apply_kwargs['dry_run'] = True

        apply_kwargs['config'] = config

        scheduler = Scheduler()
        job_id = scheduler.add_onetime_job(
            'cnaas_nms.confpush.sync_devices:apply_config',
            when=1,
            scheduled_by=get_jwt_identity(),
            kwargs=apply_kwargs,
        )

        res = empty_result(data=f"Scheduled job to restore {hostname}")
        res['job_id'] = job_id

        return res, 200
Exemple #5
0
def arista_pre_flight_check(task, job_id: Optional[str] = None) -> str:
    """
    NorNir task to do some basic checks before attempting to upgrade a switch.

    Args:
        task: NorNir task

    Returns:
        String, describing the result

    """
    set_thread_data(job_id)
    logger = get_logger()
    with sqla_session() as session:
        if Job.check_job_abort_status(session, job_id):
            return "Pre-flight aborted"

    flash_diskspace = 'bash timeout 5 df /mnt/flash | awk \'{print $4}\''
    flash_cleanup = 'bash timeout 30 ls -t /mnt/flash/*.swi | tail -n +2 | grep -v `cut -d"/" -f2 /mnt/flash/boot-config` | xargs rm -f'

    # Get amount of free disk space
    res = task.run(napalm_cli, commands=[flash_diskspace])
    if not isinstance(res, MultiResult) or len(res.result.keys()) != 1:
        raise Exception('Could not check free space')

    # Remove old firmware images if needed
    free_bytes = next(iter(res.result.values())).split('\n')[1]
    if int(free_bytes) < 2500000:
        logger.info('Cleaning up old firmware images on {}'.format(
            task.host.name))
        res = task.run(napalm_cli, commands=[flash_cleanup])
    else:
        logger.info('Enough free space ({}b), no cleanup'.format(free_bytes))

    return "Pre-flight check done."
Exemple #6
0
def arista_post_flight_check(task,
                             post_waittime: int,
                             job_id: Optional[str] = None) -> str:
    """
    NorNir task to update device facts after a switch have been upgraded

    Args:
        task: NorNir task
        post_waittime: Time to wait before trying to gather facts

    Returns:
        String, describing the result

    """
    set_thread_data(job_id)
    logger = get_logger()
    time.sleep(int(post_waittime))
    logger.info(
        'Post-flight check wait ({}s) complete, starting check for {}'.format(
            post_waittime, task.host.name))
    with sqla_session() as session:
        if Job.check_job_abort_status(session, job_id):
            return "Post-flight aborted"

    try:
        res = task.run(napalm_get, getters=["facts"])
        os_version = res[0].result['facts']['os_version']

        with sqla_session() as session:
            dev: Device = session.query(Device).filter(
                Device.hostname == task.host.name).one()
            prev_os_version = dev.os_version
            dev.os_version = os_version
            if prev_os_version == os_version:
                logger.error(
                    "OS version did not change, activation failed on {}".
                    format(task.host.name))
                raise Exception("OS version did not change, activation failed")
            else:
                dev.confhash = None
                dev.synchronized = False
    except Exception as e:
        logger.exception("Could not update OS version on device {}: {}".format(
            task.host.name, str(e)))
        return 'Post-flight failed, could not update OS version: {}'.format(
            str(e))

    return "Post-flight, OS version updated from {} to {}.".format(
        prev_os_version, os_version)
Exemple #7
0
    def get(self, hostname: str):
        args = request.args
        result = empty_result()
        result['data'] = {'config': None}
        if not Device.valid_hostname(hostname):
            return empty_result(status='error',
                                data=f"Invalid hostname specified"), 400

        kwargs = {}
        if 'job_id' in args:
            try:
                kwargs['job_id'] = int(args['job_id'])
            except Exception:
                return empty_result('error', "job_id must be an integer"), 400
        elif 'previous' in args:
            try:
                kwargs['previous'] = int(args['previous'])
            except Exception:
                return empty_result('error',
                                    "previous must be an integer"), 400
        elif 'before' in args:
            try:
                kwargs['before'] = datetime.datetime.fromisoformat(
                    args['before'])
            except Exception:
                return empty_result(
                    'error',
                    "before must be a valid ISO format date time string"), 400

        with sqla_session() as session:
            try:
                result['data'] = Job.get_previous_config(
                    session, hostname, **kwargs)
            except JobNotFoundError as e:
                return empty_result('error', str(e)), 404
            except InvalidJobError as e:
                return empty_result('error', str(e)), 500
            except Exception as e:
                return empty_result('error',
                                    "Unhandled exception: {}".format(e)), 500

        return result
Exemple #8
0
def arista_device_reboot(task, job_id: Optional[str] = None) -> str:
    """
    NorNir task to reboot a single device.

    Args:
        task: NorNir task.

    Returns:
        String, describing the result

    """
    set_thread_data(job_id)
    logger = get_logger()
    with sqla_session() as session:
        if Job.check_job_abort_status(session, job_id):
            return "Reboot aborted"

    try:
        res = task.run(netmiko_send_command,
                       command_string='enable',
                       expect_string='.*#')

        res = task.run(netmiko_send_command,
                       command_string='write',
                       expect_string='.*#')

        res = task.run(netmiko_send_command,
                       command_string='reload force',
                       max_loops=2,
                       expect_string='.*')
    except Exception as e:
        logger.exception('Failed to reboot switch {}: {}'.format(
            task.host.name, str(e)))
        raise e

    return "Device reboot done."
Exemple #9
0
    def add_onetime_job(self,
                        func: Union[str, FunctionType],
                        when: Optional[int] = None,
                        scheduled_by: Optional[str] = None,
                        **kwargs) -> int:
        """Schedule a job to run at a later time on the mule worker or
        local scheduler depending on setup.

        Some extra checks against kwargs are performed here. If kwarg
        with name 'dry_run' is included, (dry_run) is appended to function
        name. If kwarg job_comment or job_ticket_ref are included, those
        fields in the job will be populated.

        Args:
            func: The function to call
            when: Optional number of seconds to wait before starting job
            scheduled_by: Username that scheduled the job
            **kwargs: Arguments to pass through to called function
        Returns:
            int: job_id
        """
        if when and isinstance(when, int):
            trigger = 'date'
            run_date = datetime.datetime.utcnow() + datetime.timedelta(
                seconds=when)
        else:
            trigger = None
            run_date = None

        if isinstance(func, FunctionType):
            func_qualname = str(func.__qualname__)
        else:
            func_qualname = str(func)
        func_name = func_qualname.split(':')[-1]

        try:
            json.dumps(kwargs)
        except TypeError as e:
            raise TypeError("Job args must be JSON serializable: {}".format(e))

        # Append (dry_run) to function name if set, so we can distinguish dry_run jobs
        try:
            if kwargs['kwargs']['dry_run']:
                func_name += " (dry_run)"
        except Exception:
            pass

        with sqla_session() as session:
            job = Job()
            if run_date:
                job.scheduled_time = run_date
            job.function_name = func_name
            if scheduled_by is None:
                scheduled_by = 'unknown'
            job.scheduled_by = scheduled_by
            job_comment = kwargs['kwargs'].pop('job_comment', None)
            if job_comment and isinstance(job_comment, str):
                job.comment = job_comment[:255]
            job_ticket_ref = kwargs['kwargs'].pop('job_ticket_ref', None)
            if job_ticket_ref and isinstance(job_comment, str):
                job.ticket_ref = job_ticket_ref[:32]
            job.start_arguments = kwargs['kwargs']
            session.add(job)
            session.flush()
            job_id = job.id

        kwargs['job_id'] = job_id
        kwargs['scheduled_by'] = scheduled_by
        if self.use_mule:
            try:
                import uwsgi
            except Exception as e:
                logger.exception("use_mule is set but not running in uwsgi")
                raise e
            args = dict(kwargs)
            args['func'] = func_qualname
            args['trigger'] = trigger
            args['when'] = when
            args['id'] = str(job_id)
            uwsgi.mule_msg(json.dumps(args))
            return job_id
        else:
            self.add_local_job(func,
                               trigger=trigger,
                               kwargs=kwargs,
                               id=str(job_id),
                               run_date=run_date,
                               name=func_qualname)
            return job_id
Exemple #10
0
def arista_firmware_activate(task,
                             filename: str,
                             job_id: Optional[str] = None) -> str:
    """
    NorNir task to modify the boot config for new firmwares.

    Args:
        task: NorNir task
        filename: Name of the new firmware image

    Returns:
        String, describing the result

    """
    set_thread_data(job_id)
    logger = get_logger()
    with sqla_session() as session:
        if Job.check_job_abort_status(session, job_id):
            return "Firmware activate aborted"

    try:
        boot_file_cmd = 'boot system flash:{}'.format(filename)

        res = task.run(netmiko_send_command,
                       command_string='enable',
                       expect_string='.*#')

        res = task.run(
            netmiko_send_command,
            command_string='show boot-config | grep -o "\\w*{}\\w*"'.format(
                filename))
        if res.result == filename:
            raise FirmwareAlreadyActiveException(
                'Firmware already activated in boot-config on {}'.format(
                    task.host.name))

        res = task.run(netmiko_send_command,
                       command_string='conf t',
                       expect_string='.*config.*#')

        res = task.run(netmiko_send_command, command_string=boot_file_cmd)

        res = task.run(netmiko_send_command,
                       command_string='end',
                       expect_string='.*#')

        res = task.run(
            netmiko_send_command,
            command_string='show boot-config | grep -o "\\w*{}\\w*"'.format(
                filename))

        if not isinstance(res, MultiResult):
            raise Exception('Could not check boot-config on {}'.format(
                task.host.name))

        if res.result != filename:
            raise Exception('Firmware not activated properly on {}'.format(
                task.host.name))
    except FirmwareAlreadyActiveException as e:
        raise e
    except Exception as e:
        logger.exception('Failed to activate firmware on {}: {}'.format(
            task.host.name, str(e)))
        raise Exception('Failed to activate firmware')

    return "Firmware activate done."
Exemple #11
0
def arista_firmware_download(task,
                             filename: str,
                             httpd_url: str,
                             job_id: Optional[str] = None) -> str:
    """
    NorNir task to download firmware image from the HTTP server.

    Args:
        task: NorNir task
        filename: Name of the file to download
        httpd_url: Base URL to the HTTP server

    Returns:
        String, describing the result

    """
    set_thread_data(job_id)
    logger = get_logger()
    with sqla_session() as session:
        if Job.check_job_abort_status(session, job_id):
            return "Firmware download aborted"

    url = httpd_url + '/' + filename
    # Make sure netmiko doesn't use fast_cli because it will change delay_factor
    # that is set in task.run below and cause early timeouts
    net_connect = task.host.get_connection("netmiko", task.nornir.config)
    net_connect.fast_cli = False

    try:
        with sqla_session() as session:
            dev: Device = session.query(Device).\
                filter(Device.hostname == task.host.name).one_or_none()
            device_type = dev.device_type

        if device_type == DeviceType.ACCESS:
            firmware_download_cmd = 'copy {} flash:'.format(url)
        else:
            firmware_download_cmd = 'copy {} vrf MGMT flash:'.format(url)

        res = task.run(netmiko_send_command,
                       command_string=firmware_download_cmd.replace("//", "/"),
                       enable=True,
                       delay_factor=30,
                       max_loops=200)

        if 'Copy completed successfully' in res.result:
            return "Firmware download done."
        else:
            logger.debug("Firmware download failed on {} ('{}'): {}".format(
                task.host.name, firmware_download_cmd, res.result))
            raise Exception(
                "Copy command did not complete successfully: {}".format(
                    ', '.join(
                        filter(lambda x: x.startswith('get:'),
                               res.result.splitlines()))))

    except NornirSubTaskError as e:
        subtask_result = e.result[0]
        logger.error('{} failed to download firmware: {}'.format(
            task.host.name, subtask_result))
        logger.debug('{} download subtask result: {}'.format(
            task.host.name, subtask_result.result))
        raise Exception(
            'Failed to download firmware: {}'.format(subtask_result))
    except Exception as e:
        logger.error('{} failed to download firmware: {}'.format(
            task.host.name, e))
        raise Exception('Failed to download firmware: {}'.format(e))

    return "Firmware download done."