Ejemplo n.º 1
0
def emit_job_event_detail(sender, **kwargs):
    instance = kwargs['instance']
    created = kwargs['created']
    if created:
        event_serialized = JobEventWebSocketSerializer(instance).data
        emit_channel_notification('job_events-' + str(instance.job.id),
                                  event_serialized)
Ejemplo n.º 2
0
    def update_computed_fields(self):
        future_rs = Schedule.rrulestr(self.rrule)
        next_run_actual = future_rs.after(now())

        if next_run_actual is not None:
            if not datetime_exists(next_run_actual):
                # skip imaginary dates, like 2:30 on DST boundaries
                next_run_actual = future_rs.after(next_run_actual)
            next_run_actual = next_run_actual.astimezone(pytz.utc)

        self.next_run = next_run_actual
        try:
            self.dtstart = future_rs[0].astimezone(pytz.utc)
        except IndexError:
            self.dtstart = None
        self.dtend = None
        if 'until' in self.rrule.lower() or 'count' in self.rrule.lower():
            try:
                self.dtend = future_rs[-1].astimezone(pytz.utc)
            except IndexError:
                self.dtend = None
        emit_channel_notification('schedules-changed',
                                  dict(id=self.id, group_name='schedules'))
        with ignore_inventory_computed_fields():
            self.unified_job_template.update_computed_fields()
Ejemplo n.º 3
0
 def send_metrics(self):
     # more than one thread could be calling this at the same time, so should
     # acquire redis lock before sending metrics
     lock = self.conn.lock(root_key + '_lock')
     if not lock.acquire(blocking=False):
         return
     try:
         current_time = time.time()
         if current_time - self.previous_send_metrics.decode(
                 self.conn) > self.send_metrics_interval:
             payload = {
                 'instance': self.instance_name,
                 'metrics': self.serialize_local_metrics(),
             }
             # store a local copy as well
             self.store_metrics(json.dumps(payload))
             emit_channel_notification("metrics", payload)
             self.previous_send_metrics.set(current_time)
             self.previous_send_metrics.store_value(self.conn)
     finally:
         try:
             lock.release()
         except Exception as exc:
             # After system failures, we might throw redis.exceptions.LockNotOwnedError
             # this is to avoid print a Traceback, and importantly, avoid raising an exception into parent context
             logger.warning(
                 f'Error releasing subsystem metrics redis lock, error: {str(exc)}'
             )
Ejemplo n.º 4
0
Archivo: system.py Proyecto: mahak/awx
def delete_inventory(inventory_id, user_id, retries=5):
    # Delete inventory as user
    if user_id is None:
        user = None
    else:
        try:
            user = User.objects.get(id=user_id)
        except Exception:
            user = None
    with ignore_inventory_computed_fields(), ignore_inventory_group_removal(), impersonate(user):
        try:
            i = Inventory.objects.get(id=inventory_id)
            for host in i.hosts.iterator():
                host.job_events_as_primary_host.update(host=None)
            i.delete()
            emit_channel_notification('inventories-status_changed', {'group_name': 'inventories', 'inventory_id': inventory_id, 'status': 'deleted'})
            logger.debug('Deleted inventory {} as user {}.'.format(inventory_id, user_id))
        except Inventory.DoesNotExist:
            logger.exception("Delete Inventory failed due to missing inventory: " + str(inventory_id))
            return
        except DatabaseError:
            logger.exception('Database error deleting inventory {}, but will retry.'.format(inventory_id))
            if retries > 0:
                time.sleep(10)
                delete_inventory(inventory_id, user_id, retries=retries - 1)
Ejemplo n.º 5
0
def save_user_session_membership(sender, **kwargs):
    session = kwargs.get('instance', None)
    if pkg_resources.get_distribution('channels').version >= '2':
        # If you get into this code block, it means we upgraded channels, but
        # didn't make the settings.SESSIONS_PER_USER feature work
        raise RuntimeError(
            'save_user_session_membership must be updated for channels>=2: '
            'http://channels.readthedocs.io/en/latest/one-to-two.html#requirements'
        )
    if 'runworker' in sys.argv:
        # don't track user session membership for websocket per-channel sessions
        return
    if not session:
        return
    user = session.get_decoded().get(SESSION_KEY, None)
    if not user:
        return
    user = User.objects.get(pk=user)
    if UserSessionMembership.objects.filter(user=user,
                                            session=session).exists():
        return
    UserSessionMembership(user=user, session=session,
                          created=timezone.now()).save()
    expired = UserSessionMembership.get_memberships_over_limit(user)
    for membership in expired:
        Session.objects.filter(
            session_key__in=[membership.session_id]).delete()
        membership.delete()
    if len(expired):
        consumers.emit_channel_notification(
            'control-limit_reached_{}'.format(user.pk),
            dict(group_name='control', reason=unicode(_('limit_reached'))))
Ejemplo n.º 6
0
def emit_ad_hoc_command_event_detail(sender, **kwargs):
    instance = kwargs['instance']
    created = kwargs['created']
    if created:
        event_serialized = AdHocCommandEventWebSocketSerializer(instance).data
        emit_channel_notification(
            'ad_hoc_command_events-' + str(instance.ad_hoc_command_id),
            event_serialized)
Ejemplo n.º 7
0
def emit_event_detail(serializer, relation, **kwargs):
    instance = kwargs['instance']
    created = kwargs['created']
    if created:
        event_serializer = serializer(instance)
        consumers.emit_channel_notification(
            '-'.join([event_serializer.get_group_name(instance), str(getattr(instance, relation))]),
            event_serializer.data
        )
Ejemplo n.º 8
0
Archivo: system.py Proyecto: mahak/awx
def awx_periodic_scheduler():
    with advisory_lock('awx_periodic_scheduler_lock', wait=False) as acquired:
        if acquired is False:
            logger.debug("Not running periodic scheduler, another task holds lock")
            return
        logger.debug("Starting periodic scheduler")

        run_now = now()
        state = TowerScheduleState.get_solo()
        last_run = state.schedule_last_run
        logger.debug("Last scheduler run was: %s", last_run)
        state.schedule_last_run = run_now
        state.save()

        old_schedules = Schedule.objects.enabled().before(last_run)
        for schedule in old_schedules:
            schedule.update_computed_fields()
        schedules = Schedule.objects.enabled().between(last_run, run_now)

        invalid_license = False
        try:
            access_registry[Job](None).check_license(quiet=True)
        except PermissionDenied as e:
            invalid_license = e

        for schedule in schedules:
            template = schedule.unified_job_template
            schedule.update_computed_fields()  # To update next_run timestamp.
            if template.cache_timeout_blocked:
                logger.warning("Cache timeout is in the future, bypassing schedule for template %s" % str(template.id))
                continue
            try:
                job_kwargs = schedule.get_job_kwargs()
                new_unified_job = schedule.unified_job_template.create_unified_job(**job_kwargs)
                logger.debug('Spawned {} from schedule {}-{}.'.format(new_unified_job.log_format, schedule.name, schedule.pk))

                if invalid_license:
                    new_unified_job.status = 'failed'
                    new_unified_job.job_explanation = str(invalid_license)
                    new_unified_job.save(update_fields=['status', 'job_explanation'])
                    new_unified_job.websocket_emit_status("failed")
                    raise invalid_license
                can_start = new_unified_job.signal_start()
            except Exception:
                logger.exception('Error spawning scheduled job.')
                continue
            if not can_start:
                new_unified_job.status = 'failed'
                new_unified_job.job_explanation = gettext_noop(
                    "Scheduled job could not start because it \
                    was not in the right state or required manual credentials"
                )
                new_unified_job.save(update_fields=['status', 'job_explanation'])
                new_unified_job.websocket_emit_status("failed")
            emit_channel_notification('schedules-changed', dict(id=schedule.id, group_name="schedules"))
        state.save()
Ejemplo n.º 9
0
def emit_job_event_detail(sender, **kwargs):
    instance = kwargs['instance']
    created = kwargs['created']
    if created:
        event_serialized = JobEventSerializer(instance).data
        event_serialized['id'] = instance.id
        event_serialized["created"] = event_serialized["created"].isoformat()
        event_serialized["modified"] = event_serialized["modified"].isoformat()
        event_serialized["event_name"] = instance.event
        event_serialized["group_name"] = "job_events"
        emit_channel_notification('job_events-' + str(instance.job.id), event_serialized)
Ejemplo n.º 10
0
def emit_ad_hoc_command_event_detail(sender, **kwargs):
    instance = kwargs['instance']
    created = kwargs['created']
    if created:
        event_serialized = AdHocCommandEventSerializer(instance).data
        event_serialized['id'] = instance.id
        event_serialized["created"] = event_serialized["created"].isoformat()
        event_serialized["modified"] = event_serialized["modified"].isoformat()
        event_serialized["event_name"] = instance.event
        event_serialized["group_name"] = "ad_hoc_command_events"
        emit_channel_notification('ad_hoc_command_events-' + str(instance.ad_hoc_command_id), event_serialized)
Ejemplo n.º 11
0
 def update_computed_fields(self):
     changed = self.update_computed_fields_no_save()
     if not changed:
         return
     emit_channel_notification('schedules-changed', dict(id=self.id, group_name='schedules'))
     # Must save self here before calling unified_job_template computed fields
     # in order for that method to be correct
     # by adding modified to update fields, we avoid updating modified time
     super(Schedule, self).save(update_fields=['next_run', 'dtstart', 'dtend', 'modified'])
     with ignore_inventory_computed_fields():
         self.unified_job_template.update_computed_fields()
Ejemplo n.º 12
0
def emit_event_detail(event):
    if (
        settings.UI_LIVE_UPDATES_ENABLED is False and
        event.event not in MINIMAL_EVENTS
    ):
        return
    cls = event.__class__
    relation = {
        JobEvent: 'job_id',
        AdHocCommandEvent: 'ad_hoc_command_id',
        ProjectUpdateEvent: 'project_update_id',
        InventoryUpdateEvent: 'inventory_update_id',
        SystemJobEvent: 'system_job_id',
    }[cls]
    url = ''
    if isinstance(event, JobEvent):
        url = '/api/v2/job_events/{}'.format(event.id)
    if isinstance(event, AdHocCommandEvent):
        url = '/api/v2/ad_hoc_command_events/{}'.format(event.id)
    group = camelcase_to_underscore(cls.__name__) + 's'
    timestamp = event.created.isoformat()
    consumers.emit_channel_notification(
        '-'.join([group, str(getattr(event, relation))]),
        {
            'id': event.id,
            relation.replace('_id', ''): getattr(event, relation),
            'created': timestamp,
            'modified': timestamp,
            'group_name': group,
            'url': url,
            'stdout': event.stdout,
            'counter': event.counter,
            'uuid': event.uuid,
            'parent_uuid': getattr(event, 'parent_uuid', ''),
            'start_line': event.start_line,
            'end_line': event.end_line,
            'event': event.event,
            'event_data': getattr(event, 'event_data', {}),
            'failed': event.failed,
            'changed': event.changed,
            'event_level': getattr(event, 'event_level', ''),
            'play': getattr(event, 'play', ''),
            'role': getattr(event, 'role', ''),
            'task': getattr(event, 'task', ''),
        }
    )
Ejemplo n.º 13
0
    def _websocket_emit_status(self, status):
        try:
            status_data = dict(unified_job_id=self.id, status=status)
            if status == 'waiting':
                if self.instance_group:
                    status_data['instance_group_name'] = self.instance_group.name
                else:
                    status_data['instance_group_name'] = None
            status_data.update(self.websocket_emit_data())
            status_data['group_name'] = 'jobs'
            emit_channel_notification('jobs-status_changed', status_data)

            if self.spawned_by_workflow:
                status_data['group_name'] = "workflow_events"
                emit_channel_notification('workflow_events-' + str(self.workflow_job_id), status_data)
        except IOError:  # includes socket errors
            logger.exception('%s failed to emit channel msg about status change', self.log_format)
Ejemplo n.º 14
0
def save_user_session_membership(sender, **kwargs):
    session = kwargs.get('instance', None)
    if not session:
        return
    user_id = session.get_decoded().get(SESSION_KEY, None)
    if not user_id:
        return
    if UserSessionMembership.objects.filter(user=user_id, session=session).exists():
        return
    # check if user_id from session has an id match in User before saving
    if User.objects.filter(id=int(user_id)).exists():
        UserSessionMembership(user_id=user_id, session=session, created=timezone.now()).save()
        expired = UserSessionMembership.get_memberships_over_limit(user_id)
        for membership in expired:
            Session.objects.filter(session_key__in=[membership.session_id]).delete()
            membership.delete()
        if len(expired):
            consumers.emit_channel_notification('control-limit_reached_{}'.format(user_id), dict(group_name='control', reason='limit_reached'))
Ejemplo n.º 15
0
    def update_computed_fields(self):
        future_rs = dateutil.rrule.rrulestr(self.rrule, forceset=True)
        next_run_actual = future_rs.after(now())

        self.next_run = next_run_actual
        try:
            self.dtstart = future_rs[0]
        except IndexError:
            self.dtstart = None
        self.dtend = None
        if 'until' in self.rrule.lower():
            match_until = re.match(".*?(UNTIL\=[0-9]+T[0-9]+Z)", self.rrule)
            until_date = match_until.groups()[0].split("=")[1]
            self.dtend = make_aware(datetime.datetime.strptime(until_date, "%Y%m%dT%H%M%SZ"), get_default_timezone())
        if 'count' in self.rrule.lower():
            self.dtend = future_rs[-1]
        emit_channel_notification('schedules-changed', dict(id=self.id, group_name='schedules'))
        with ignore_inventory_computed_fields():
            self.unified_job_template.update_computed_fields()
Ejemplo n.º 16
0
def save_user_session_membership(sender, **kwargs):
    session = kwargs.get('instance', None)
    if not session:
        return
    user = session.get_decoded().get(SESSION_KEY, None)
    if not user:
        return
    user = User.objects.get(pk=user)
    if UserSessionMembership.objects.filter(user=user,
                                            session=session).exists():
        return
    UserSessionMembership.objects.create(user=user,
                                         session=session,
                                         created=timezone.now())
    for membership in UserSessionMembership.get_memberships_over_limit(user):
        consumers.emit_channel_notification(
            'control-limit_reached',
            dict(group_name='control',
                 reason=unicode(_('limit_reached')),
                 session_key=membership.session.session_key))
 def send_metrics(self):
     # more than one thread could be calling this at the same time, so should
     # get acquire redis lock before sending metrics
     lock = self.conn.lock(root_key + '_lock', thread_local=False)
     if not lock.acquire(blocking=False):
         return
     try:
         current_time = time.time()
         if current_time - self.previous_send_metrics.decode(self.conn) > self.send_metrics_interval:
             payload = {
                 'instance': self.instance_name,
                 'metrics': self.serialize_local_metrics(),
             }
             # store a local copy as well
             self.store_metrics(json.dumps(payload))
             emit_channel_notification("metrics", payload)
             self.previous_send_metrics.set(current_time)
             self.previous_send_metrics.store_value(self.conn)
     finally:
         lock.release()
Ejemplo n.º 18
0
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if flush:
                self.last_event = ''
            if not flush:
                event_map = {
                    'job_id': JobEvent,
                    'ad_hoc_command_id': AdHocCommandEvent,
                    'project_update_id': ProjectUpdateEvent,
                    'inventory_update_id': InventoryUpdateEvent,
                    'system_job_id': SystemJobEvent,
                }

                job_identifier = 'unknown job'
                for key, cls in event_map.items():
                    if key in body:
                        job_identifier = body[key]
                        break

                self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})'  # noqa

                if body.get('event') == 'EOF':
                    try:
                        if 'guid' in body:
                            GuidMiddleware.set_guid(body['guid'])
                        final_counter = body.get('final_counter', 0)
                        logger.info(
                            'Event processing is finished for Job {}, sending notifications'
                            .format(job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier,
                                 final_counter=final_counter))
                        # Additionally, when we've processed all events, we should
                        # have all the data we need to send out success/failure
                        # notification templates
                        uj = UnifiedJob.objects.get(pk=job_identifier)

                        if isinstance(uj, Job):
                            # *actual playbooks* send their success/failure
                            # notifications in response to the playbook_on_stats
                            # event handling code in main.models.events
                            pass
                        elif hasattr(uj, 'send_notification_templates'):
                            handle_success_and_failure_notifications.apply_async(
                                [uj.id])
                    except Exception:
                        logger.exception(
                            'Worker failed to emit notifications: Job {}'.
                            format(job_identifier))
                    finally:
                        self.subsystem_metrics.inc(
                            'callback_receiver_events_in_memory', -1)
                        GuidMiddleware.set_guid('')
                    return

                skip_websocket_message = body.pop('skip_websocket_message',
                                                  False)

                event = cls.create_from_data(**body)

                if skip_websocket_message:
                    event._skip_websocket_message = True

                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception(
                            'Worker could not re-establish database connectivity, giving up on one or more events.'
                        )
                        return
                    delay = 60 * retries
                    logger.exception(
                        'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                        .format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))
Ejemplo n.º 19
0
    def run(self, job_id, speed=1.0, verbosity=0):
        stats = {
            'events_ontime': {
                'total': 0,
                'percentage': 0,
            },
            'events_late': {
                'total': 0,
                'percentage': 0,
                'lateness_total': 0,
                'lateness_average': 0,
            },
            'events_total': 0,
            'events_distance_total': 0,
            'events_distance_average': 0,
            'recording_start': 0,
            'recording_end': 0,
            'recording_duration': 0,
            'replay_start': 0,
            'replay_end': 0,
            'replay_duration': 0,
        }
        try:
            job = self.get_job(job_id)
            job_events = self.get_job_events(job)
            serializer = self.get_serializer(job)
        except RuntimeError as e:
            print("{}".format(e.message))
            sys.exit(1)

        je_previous = None
        for je_current in job_events:
            if not je_previous:
                stats['recording_start'] = je_current.created
                self.start(je_current.created)
                stats['replay_start'] = self.replay_start
                je_previous = je_current

            je_serialized = serializer(je_current).data
            emit_channel_notification(
                '{}-{}'.format(je_serialized['group_name'], job.id),
                je_serialized)

            replay_offset = self.replay_offset(je_previous.created, speed)
            recording_diff = (je_current.created - je_previous.created
                              ).total_seconds() * (1.0 / speed)
            stats['events_distance_total'] += recording_diff
            if verbosity >= 3:
                print(
                    "recording: next job in {} seconds".format(recording_diff))
            if replay_offset >= 0:
                replay_diff = recording_diff - replay_offset

                if replay_diff > 0:
                    stats['events_ontime']['total'] += 1
                    if verbosity >= 3:
                        print("\treplay: sleep for {} seconds".format(
                            replay_diff))
                    self.sleep(replay_diff)
                else:
                    stats['events_late']['total'] += 1
                    stats['events_late']['lateness_total'] += (replay_diff *
                                                               -1)
                    if verbosity >= 3:
                        print("\treplay: too far behind to sleep {} seconds".
                              format(replay_diff))
            else:
                replay_offset = self.replay_offset(je_current.created, speed)
                stats['events_late']['lateness_total'] += (replay_offset * -1)
                stats['events_late']['total'] += 1
                if verbosity >= 3:
                    print(
                        "\treplay: behind by {} seconds".format(replay_offset))

            stats['events_total'] += 1
            je_previous = je_current

        stats['replay_end'] = self.now()
        stats['replay_duration'] = (stats['replay_end'] -
                                    stats['replay_start']).total_seconds()
        stats['replay_start'] = stats['replay_start'].isoformat()
        stats['replay_end'] = stats['replay_end'].isoformat()

        stats['recording_end'] = je_current.created
        stats['recording_duration'] = (
            stats['recording_end'] - stats['recording_start']).total_seconds()
        stats['recording_start'] = stats['recording_start'].isoformat()
        stats['recording_end'] = stats['recording_end'].isoformat()

        stats['events_ontime']['percentage'] = (
            stats['events_ontime']['total'] /
            float(stats['events_total'])) * 100.00
        stats['events_late']['percentage'] = (
            stats['events_late']['total'] /
            float(stats['events_total'])) * 100.00
        stats['events_distance_average'] = stats[
            'events_distance_total'] / stats['events_total']
        stats['events_late']['lateness_average'] = stats['events_late'][
            'lateness_total'] / stats['events_late']['total']
        if verbosity >= 2:
            print(json.dumps(stats, indent=4, sort_keys=True))
Ejemplo n.º 20
0
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if flush:
                self.last_event = ''
            if not flush:
                job_identifier = 'unknown job'
                for cls in (JobEvent, AdHocCommandEvent, ProjectUpdateEvent,
                            InventoryUpdateEvent, SystemJobEvent):
                    if cls.JOB_REFERENCE in body:
                        job_identifier = body[cls.JOB_REFERENCE]
                        break

                self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})'  # noqa

                notification_trigger_event = bool(
                    body.get('event') == cls.WRAPUP_EVENT)

                if body.get('event') == 'EOF':
                    try:
                        if 'guid' in body:
                            set_guid(body['guid'])
                        final_counter = body.get('final_counter', 0)
                        logger.info(
                            'Starting EOF event processing for Job {}'.format(
                                job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier,
                                 final_counter=final_counter))

                        if notification_trigger_event:
                            job_stats_wrapup(job_identifier)
                    except Exception:
                        logger.exception(
                            'Worker failed to perform EOF tasks: Job {}'.
                            format(job_identifier))
                    finally:
                        self.subsystem_metrics.inc(
                            'callback_receiver_events_in_memory', -1)
                        set_guid('')
                    return

                skip_websocket_message = body.pop('skip_websocket_message',
                                                  False)

                event = cls.create_from_data(**body)

                if skip_websocket_message:  # if this event sends websocket messages, fire them off on flush
                    event._skip_websocket_message = True

                if notification_trigger_event:  # if this is an Ansible stats event, ensure notifications on flush
                    event._notification_trigger_event = True

                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception(
                            'Worker could not re-establish database connectivity, giving up on one or more events.'
                        )
                        return
                    delay = 60 * retries
                    logger.exception(
                        'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                        .format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))
Ejemplo n.º 21
0
    def run(self,
            job_id,
            speed=1.0,
            verbosity=0,
            skip_range=[],
            random_seed=0,
            final_status_delay=0,
            debug=False):
        stats = {
            'events_ontime': {
                'total': 0,
                'percentage': 0,
            },
            'events_late': {
                'total': 0,
                'percentage': 0,
                'lateness_total': 0,
                'lateness_average': 0,
            },
            'events_total': 0,
            'events_distance_total': 0,
            'events_distance_average': 0,
            'recording_start': 0,
            'recording_end': 0,
            'recording_duration': 0,
            'replay_start': 0,
            'replay_end': 0,
            'replay_duration': 0,
        }
        try:
            job = self.get_job(job_id)
            job_events, job_event_count = self.get_job_events(job)
            serializer = self.get_serializer(job)
        except RuntimeError as e:
            print("{}".format(e.message))
            sys.exit(1)

        je_previous = None

        self.emit_job_status(job, 'pending')
        self.emit_job_status(job, 'waiting')
        self.emit_job_status(job, 'running')

        finish_status_index = self.determine_job_event_finish_status_index(
            job_event_count, random_seed)

        for n, je_current in enumerate(job_events):
            if je_current.counter in skip_range:
                continue

            if debug:
                raw_input("{} of {}:".format(n, job_event_count))

            if not je_previous:
                stats['recording_start'] = je_current.created
                self.start(je_current.created)
                stats['replay_start'] = self.replay_start
                je_previous = je_current

            je_serialized = serializer(je_current).data
            emit_channel_notification(
                '{}-{}'.format(je_serialized['group_name'], job.id),
                je_serialized)

            replay_offset = self.replay_offset(je_previous.created, speed)
            recording_diff = (je_current.created - je_previous.created
                              ).total_seconds() * (1.0 / speed)
            stats['events_distance_total'] += recording_diff
            if verbosity >= 3:
                print(
                    "recording: next job in {} seconds".format(recording_diff))
            if replay_offset >= 0:
                replay_diff = recording_diff - replay_offset

                if replay_diff > 0:
                    stats['events_ontime']['total'] += 1
                    if verbosity >= 3:
                        print("\treplay: sleep for {} seconds".format(
                            replay_diff))
                    self.sleep(replay_diff)
                else:
                    stats['events_late']['total'] += 1
                    stats['events_late']['lateness_total'] += (replay_diff *
                                                               -1)
                    if verbosity >= 3:
                        print("\treplay: too far behind to sleep {} seconds".
                              format(replay_diff))
            else:
                replay_offset = self.replay_offset(je_current.created, speed)
                stats['events_late']['lateness_total'] += (replay_offset * -1)
                stats['events_late']['total'] += 1
                if verbosity >= 3:
                    print(
                        "\treplay: behind by {} seconds".format(replay_offset))

            stats['events_total'] += 1
            je_previous = je_current

            if n == finish_status_index:
                if final_status_delay != 0:
                    self.sleep(final_status_delay)
                self.emit_job_status(job, job.status)

        if stats['events_total'] > 2:
            stats['replay_end'] = self.now()
            stats['replay_duration'] = (stats['replay_end'] -
                                        stats['replay_start']).total_seconds()
            stats['replay_start'] = stats['replay_start'].isoformat()
            stats['replay_end'] = stats['replay_end'].isoformat()

            stats['recording_end'] = je_current.created
            stats['recording_duration'] = (
                stats['recording_end'] -
                stats['recording_start']).total_seconds()
            stats['recording_start'] = stats['recording_start'].isoformat()
            stats['recording_end'] = stats['recording_end'].isoformat()

            stats['events_ontime']['percentage'] = (
                stats['events_ontime']['total'] /
                float(stats['events_total'])) * 100.00
            stats['events_late']['percentage'] = (
                stats['events_late']['total'] /
                float(stats['events_total'])) * 100.00
            stats['events_distance_average'] = stats[
                'events_distance_total'] / stats['events_total']
            stats['events_late']['lateness_average'] = stats['events_late'][
                'lateness_total'] / stats['events_late']['total']
        else:
            stats = {'events_total': stats['events_total']}

        if verbosity >= 2:
            print(json.dumps(stats, indent=4, sort_keys=True))
Ejemplo n.º 22
0
    def callback_worker(self, queue_actual, idx):
        signal_handler = WorkerSignalHandler()
        while not signal_handler.kill_now:
            try:
                body = queue_actual.get(block=True, timeout=1)
            except QueueEmpty:
                continue
            except Exception as e:
                logger.error("Exception on worker thread, restarting: " +
                             str(e))
                continue
            try:

                event_map = {
                    'job_id': JobEvent,
                    'ad_hoc_command_id': AdHocCommandEvent,
                    'project_update_id': ProjectUpdateEvent,
                    'inventory_update_id': InventoryUpdateEvent,
                    'system_job_id': SystemJobEvent,
                }

                if not any([key in body for key in event_map]):
                    raise Exception('Payload does not have a job identifier')
                if settings.DEBUG:
                    from pygments import highlight
                    from pygments.lexers import PythonLexer
                    from pygments.formatters import Terminal256Formatter
                    from pprint import pformat
                    logger.info('Body: {}'.format(
                        highlight(pformat(body, width=160), PythonLexer(),
                                  Terminal256Formatter(
                                      style='friendly')))[:1024 * 4])

                def _save_event_data():
                    for key, cls in event_map.items():
                        if key in body:
                            cls.create_from_data(**body)

                job_identifier = 'unknown job'
                for key in event_map.keys():
                    if key in body:
                        job_identifier = body[key]
                        break

                if body.get('event') == 'EOF':
                    try:
                        logger.info(
                            'Event processing is finished for Job {}, sending notifications'
                            .format(job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier))
                        # Additionally, when we've processed all events, we should
                        # have all the data we need to send out success/failure
                        # notification templates
                        uj = UnifiedJob.objects.get(pk=job_identifier)
                        if hasattr(uj, 'send_notification_templates'):
                            retries = 0
                            while retries < 5:
                                if uj.finished:
                                    uj.send_notification_templates(
                                        'succeeded' if uj.status ==
                                        'successful' else 'failed')
                                    break
                                else:
                                    # wait a few seconds to avoid a race where the
                                    # events are persisted _before_ the UJ.status
                                    # changes from running -> successful
                                    retries += 1
                                    time.sleep(1)
                                    uj = UnifiedJob.objects.get(
                                        pk=job_identifier)
                    except Exception:
                        logger.exception(
                            'Worker failed to emit notifications: Job {}'.
                            format(job_identifier))
                    continue

                retries = 0
                while retries <= self.MAX_RETRIES:
                    try:
                        _save_event_data()
                        break
                    except (OperationalError, InterfaceError,
                            InternalError) as e:
                        if retries >= self.MAX_RETRIES:
                            logger.exception(
                                'Worker could not re-establish database connectivity, shutting down gracefully: Job {}'
                                .format(job_identifier))
                            os.kill(os.getppid(), signal.SIGINT)
                            return
                        delay = 60 * retries
                        logger.exception(
                            'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                            .format(i=retries + 1, delay=delay))
                        django_connection.close()
                        time.sleep(delay)
                        retries += 1
                    except DatabaseError as e:
                        logger.exception(
                            'Database Error Saving Job Event for Job {}'.
                            format(job_identifier))
                        break
            except Exception as exc:
                import traceback
                tb = traceback.format_exc()
                logger.error('Callback Task Processor Raised Exception: %r',
                             exc)
                logger.error('Detail: {}'.format(tb))
Ejemplo n.º 23
0
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if not flush:
                event_map = {
                    'job_id': JobEvent,
                    'ad_hoc_command_id': AdHocCommandEvent,
                    'project_update_id': ProjectUpdateEvent,
                    'inventory_update_id': InventoryUpdateEvent,
                    'system_job_id': SystemJobEvent,
                }

                job_identifier = 'unknown job'
                for key, cls in event_map.items():
                    if key in body:
                        job_identifier = body[key]
                        break

                if body.get('event') == 'EOF':
                    try:
                        final_counter = body.get('final_counter', 0)
                        logger.info(
                            'Event processing is finished for Job {}, sending notifications'
                            .format(job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier,
                                 final_counter=final_counter))
                        # Additionally, when we've processed all events, we should
                        # have all the data we need to send out success/failure
                        # notification templates
                        uj = UnifiedJob.objects.get(pk=job_identifier)
                        if hasattr(uj, 'send_notification_templates'):
                            retries = 0
                            while retries < 5:
                                if uj.finished:
                                    uj.send_notification_templates(
                                        'succeeded' if uj.status ==
                                        'successful' else 'failed')
                                    break
                                else:
                                    # wait a few seconds to avoid a race where the
                                    # events are persisted _before_ the UJ.status
                                    # changes from running -> successful
                                    retries += 1
                                    time.sleep(1)
                                    uj = UnifiedJob.objects.get(
                                        pk=job_identifier)
                    except Exception:
                        logger.exception(
                            'Worker failed to emit notifications: Job {}'.
                            format(job_identifier))
                    return

                event = cls.create_from_data(**body)
                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception(
                            'Worker could not re-establish database connectivity, giving up on one or more events.'
                        )
                        return
                    delay = 60 * retries
                    logger.exception(
                        'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                        .format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))