Ejemplo n.º 1
0
    def run(self, instance, private_data_dir, playbook, module, module_args,
            event_data_key, ident=None):
        """
        Run a job on an isolated host.

        :param instance:         a `model.Job` instance
        :param private_data_dir: an absolute path on the local file system
                                 where job-specific data should be written
                                 (i.e., `/tmp/awx_N_xyz/`)
        :param playbook:         the playbook to run
        :param module:           the module to run
        :param module_args:      the module args to use
        :param event_data_key:   e.g., job_id, inventory_id, ...

        For a completed job run, this function returns (status, rc),
        representing the status and return code of the isolated
        `ansible-playbook` run.
        """
        self.ident = ident
        self.event_data_key = event_data_key
        self.instance = instance
        self.private_data_dir = private_data_dir
        self.runner_params = self.build_runner_params(
            [instance.execution_node],
            verbosity=min(5, self.instance.verbosity)
        )
        status, rc = self.dispatch(playbook, module, module_args)
        if status == 'successful':
            status, rc = self.check()
        else:
            # emit an EOF event
            event_data = {'event': 'EOF', 'final_counter': 0}
            event_data.setdefault(self.event_data_key, self.instance.id)
            CallbackQueueDispatcher().dispatch(event_data)
        return status, rc
Ejemplo n.º 2
0
    def wrap_stdout_handle(instance,
                           private_data_dir,
                           stdout_handle,
                           event_data_key='job_id'):
        dispatcher = CallbackQueueDispatcher()

        def job_event_callback(event_data):
            event_data.setdefault(event_data_key, instance.id)
            if 'uuid' in event_data:
                filename = '{}-partial.json'.format(event_data['uuid'])
                partial_filename = os.path.join(private_data_dir, 'artifacts',
                                                'job_events', filename)
                try:
                    with codecs.open(partial_filename, 'r',
                                     encoding='utf-8') as f:
                        partial_event_data = json.load(f)
                    event_data.update(partial_event_data)
                except IOError:
                    if event_data.get('event', '') != 'verbose':
                        logger.error(
                            'Missing callback data for event type `{}`, uuid {}, job {}.\nevent_data: {}'
                            .format(event_data.get('event',
                                                   ''), event_data['uuid'],
                                    instance.id, event_data))
            dispatcher.dispatch(event_data)

        return OutputEventFilter(stdout_handle, job_event_callback)
Ejemplo n.º 3
0
    def dispatch(self, playbook=None, module=None, module_args=None):
        '''
        Ship the runner payload to a remote host for isolated execution.
        '''
        self.handled_events = set()
        self.started_at = time.time()

        self.build_isolated_job_data()
        extra_vars = {
            'src': self.private_data_dir,
            'dest': settings.AWX_PROOT_BASE_PATH,
            'ident': self.ident
        }
        if playbook:
            extra_vars['playbook'] = playbook
        if module and module_args:
            extra_vars['module'] = module
            extra_vars['module_args'] = module_args

        # Run ansible-playbook to launch a job on the isolated host.  This:
        #
        # - sets up a temporary directory for proot/bwrap (if necessary)
        # - copies encrypted job data from the controlling host to the isolated host (with rsync)
        # - writes the encryption secret to a named pipe on the isolated host
        # - launches ansible-runner
        args = self._build_args('run_isolated.yml', '%s,' % self.host,
                                extra_vars)
        if self.instance.verbosity:
            args.append('-%s' % ('v' * min(5, self.instance.verbosity)))
        buff = StringIO()
        logger.debug(
            'Starting job {} on isolated host with `run_isolated.yml` playbook.'
            .format(self.instance.id))
        status, rc = IsolatedManager.run_pexpect(
            args,
            self.awx_playbook_path(),
            self.management_env,
            buff,
            idle_timeout=self.idle_timeout,
            job_timeout=settings.AWX_ISOLATED_LAUNCH_TIMEOUT,
            pexpect_timeout=5)
        output = buff.getvalue()
        playbook_logger.info('Isolated job {} dispatch:\n{}'.format(
            self.instance.id, output))
        if status != 'successful':
            for event_data in [
                {
                    'event': 'verbose',
                    'stdout': output
                },
                {
                    'event': 'EOF',
                    'final_counter': 1
                },
            ]:
                event_data.setdefault(self.event_data_key, self.instance.id)
                CallbackQueueDispatcher().dispatch(event_data)
        return status, rc
Ejemplo n.º 4
0
 def __init__(self, model=None):
     self.parent_workflow_job_id = None
     self.host_map = {}
     self.guid = GuidMiddleware.get_guid()
     self.job_created = None
     self.recent_event_timings = deque(
         maxlen=settings.MAX_WEBSOCKET_EVENT_RATE)
     self.dispatcher = CallbackQueueDispatcher()
     self.safe_env = {}
     self.event_ct = 0
     self.model = model
Ejemplo n.º 5
0
 def __init__(self, model=None):
     self.parent_workflow_job_id = None
     self.host_map = {}
     self.guid = get_guid()
     self.job_created = None
     self.recent_event_timings = deque(maxlen=settings.MAX_WEBSOCKET_EVENT_RATE)
     self.dispatcher = CallbackQueueDispatcher()
     self.safe_env = {}
     self.event_ct = 0
     self.model = model
     self.update_attempts = int(settings.DISPATCHER_DB_DOWNTOWN_TOLLERANCE / 5)
Ejemplo n.º 6
0
    def check(self, interval=None):
        """
        Repeatedly poll the isolated node to determine if the job has run.

        On success, copy job artifacts to the controlling node.
        On failure, continue to poll the isolated node (until the job timeout
        is exceeded).

        For a completed job run, this function returns (status, rc),
        representing the status and return code of the isolated
        `ansible-playbook` run.

        :param interval: an interval (in seconds) to wait between status polls
        """
        interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
        extravars = {'src': self.private_data_dir}
        status = 'failed'
        rc = None
        last_check = time.time()
        dispatcher = CallbackQueueDispatcher()
        while status == 'failed':
            canceled = self.cancelled_callback() if self.cancelled_callback else False
            if not canceled and time.time() - last_check < interval:
                # If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer
                time.sleep(1)
                continue

            if canceled:
                logger.warning('Isolated job {} was manually cancelled.'.format(self.instance.id))

            logger.debug('Checking on isolated job {} with `check_isolated.yml`.'.format(self.instance.id))
            runner_obj = self.run_management_playbook('check_isolated.yml',
                                                      self.private_data_dir,
                                                      extravars=extravars)
            status, rc = runner_obj.status, runner_obj.rc

            if self.check_callback is not None and not self.captured_command_artifact:
                command_path = self.path_to('artifacts', self.ident, 'command')
                # If the configuration artifact has been synced back, update the model
                if os.path.exists(command_path):
                    try:
                        with open(command_path, 'r') as f:
                            data = json.load(f)
                        self.check_callback(data)
                        self.captured_command_artifact = True
                    except json.decoder.JSONDecodeError:  # Just in case it's not fully here yet.
                        pass

            self.consume_events(dispatcher)

            last_check = time.time()

        if status == 'successful':
            status_path = self.path_to('artifacts', self.ident, 'status')
            rc_path = self.path_to('artifacts', self.ident, 'rc')
            if os.path.exists(status_path):
                with open(status_path, 'r') as f:
                    status = f.readline()
                with open(rc_path, 'r') as f:
                    rc = int(f.readline())
            else:
                # if there's no status file, it means that runner _probably_
                # exited with a traceback (which should be logged to
                # daemon.log)  Record it so we can see how runner failed.
                daemon_path = self.path_to('daemon.log')
                if os.path.exists(daemon_path):
                    with open(daemon_path, 'r') as f:
                        self.instance.result_traceback = f.read()
                        self.instance.save(update_fields=['result_traceback'])
                else:
                    logger.error('Failed to rsync daemon.log (is ansible-runner installed on the isolated host?)')
                status = 'failed'
                rc = 1

        # consume events one last time just to be sure we didn't miss anything
        # in the final sync
        self.consume_events(dispatcher)

        # emit an EOF event
        event_data = {
            'event': 'EOF',
            'final_counter': len(self.handled_events)
        }
        event_data.setdefault(self.event_data_key, self.instance.id)
        dispatcher.dispatch(event_data)

        return status, rc
Ejemplo n.º 7
0
    def check(self, interval=None):
        """
        Repeatedly poll the isolated node to determine if the job has run.

        On success, copy job artifacts to the controlling node.
        On failure, continue to poll the isolated node (until the job timeout
        is exceeded).

        For a completed job run, this function returns (status, rc),
        representing the status and return code of the isolated
        `ansible-playbook` run.

        :param interval: an interval (in seconds) to wait between status polls
        """
        interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
        extra_vars = {'src': self.private_data_dir}
        args = self._build_args('check_isolated.yml', '%s,' % self.host,
                                extra_vars)
        if self.instance.verbosity:
            args.append('-%s' % ('v' * min(5, self.instance.verbosity)))

        status = 'failed'
        output = ''
        rc = None
        buff = StringIO()
        last_check = time.time()
        job_timeout = remaining = self.job_timeout
        dispatcher = CallbackQueueDispatcher()
        while status == 'failed':
            if job_timeout != 0:
                remaining = max(0,
                                job_timeout - (time.time() - self.started_at))
                if remaining == 0:
                    # if it takes longer than $REMAINING_JOB_TIMEOUT to retrieve
                    # job artifacts from the host, consider the job failed
                    if isinstance(self.extra_update_fields, dict):
                        self.extra_update_fields[
                            'job_explanation'] = "Job terminated due to timeout"
                    status = 'failed'
                    break

            canceled = self.cancelled_callback(
            ) if self.cancelled_callback else False
            if not canceled and time.time() - last_check < interval:
                # If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer
                time.sleep(1)
                continue

            buff = StringIO()
            logger.debug(
                'Checking on isolated job {} with `check_isolated.yml`.'.
                format(self.instance.id))
            status, rc = IsolatedManager.run_pexpect(
                args,
                self.awx_playbook_path(),
                self.management_env,
                buff,
                cancelled_callback=self.cancelled_callback,
                idle_timeout=remaining,
                job_timeout=remaining,
                pexpect_timeout=5,
                proot_cmd=self.proot_cmd)
            output = buff.getvalue().encode('utf-8')
            playbook_logger.info('Isolated job {} check:\n{}'.format(
                self.instance.id, output))

            # discover new events and ingest them
            events_path = self.path_to('artifacts', self.ident, 'job_events')

            # it's possible that `events_path` doesn't exist *yet*, because runner
            # hasn't actually written any events yet (if you ran e.g., a sleep 30)
            # only attempt to consume events if any were rsynced back
            if os.path.exists(events_path):
                for event in set(
                        os.listdir(events_path)) - self.handled_events:
                    path = os.path.join(events_path, event)
                    if os.path.exists(path):
                        event_data = json.load(
                            open(os.path.join(events_path, event), 'r'))
                        event_data.setdefault(self.event_data_key,
                                              self.instance.id)
                        dispatcher.dispatch(event_data)
                        self.handled_events.add(event)

                        # handle artifacts
                        if event_data.get('event_data',
                                          {}).get('artifact_data', {}):
                            self.instance.artifacts = event_data['event_data'][
                                'artifact_data']
                            self.instance.save(update_fields=['artifacts'])

            last_check = time.time()

        if status == 'successful':
            status_path = self.path_to('artifacts', self.ident, 'status')
            rc_path = self.path_to('artifacts', self.ident, 'rc')
            with open(status_path, 'r') as f:
                status = f.readline()
            with open(rc_path, 'r') as f:
                rc = int(f.readline())

        # emit an EOF event
        event_data = {
            'event': 'EOF',
            'final_counter': len(self.handled_events)
        }
        event_data.setdefault(self.event_data_key, self.instance.id)
        dispatcher.dispatch(event_data)

        return status, rc
Ejemplo n.º 8
0
    def check(self, interval=None):
        """
        Repeatedly poll the isolated node to determine if the job has run.

        On success, copy job artifacts to the controlling node.
        On failure, continue to poll the isolated node (until the job timeout
        is exceeded).

        For a completed job run, this function returns (status, rc),
        representing the status and return code of the isolated
        `ansible-playbook` run.

        :param interval: an interval (in seconds) to wait between status polls
        """
        interval = interval if interval is not None else settings.AWX_ISOLATED_CHECK_INTERVAL
        extravars = {'src': self.private_data_dir}
        status = 'failed'
        rc = None
        last_check = time.time()
        dispatcher = CallbackQueueDispatcher()
        while status == 'failed':
            canceled = self.cancelled_callback(
            ) if self.cancelled_callback else False
            if not canceled and time.time() - last_check < interval:
                # If the job isn't cancelled, but we haven't waited `interval` seconds, wait longer
                time.sleep(1)
                continue

            if canceled:
                logger.warning(
                    'Isolated job {} was manually cancelled.'.format(
                        self.instance.id))

            logger.debug(
                'Checking on isolated job {} with `check_isolated.yml`.'.
                format(self.instance.id))
            runner_obj = self.run_management_playbook('check_isolated.yml',
                                                      self.private_data_dir,
                                                      extravars=extravars)
            status, rc = runner_obj.status, runner_obj.rc
            self.consume_events(dispatcher)

            last_check = time.time()

        if status == 'successful':
            status_path = self.path_to('artifacts', self.ident, 'status')
            rc_path = self.path_to('artifacts', self.ident, 'rc')
            with open(status_path, 'r') as f:
                status = f.readline()
            with open(rc_path, 'r') as f:
                rc = int(f.readline())

        # consume events one last time just to be sure we didn't miss anything
        # in the final sync
        self.consume_events(dispatcher)

        # emit an EOF event
        event_data = {
            'event': 'EOF',
            'final_counter': len(self.handled_events)
        }
        event_data.setdefault(self.event_data_key, self.instance.id)
        dispatcher.dispatch(event_data)

        return status, rc