예제 #1
0
 def flush(self, force=False):
     now = tz_now()
     if (force
             or any([len(events) >= 1000
                     for events in self.buff.values()])):
         for cls, events in self.buff.items():
             logger.debug(
                 f'{cls.__name__}.objects.bulk_create({len(events)})')
             for e in events:
                 if not e.created:
                     e.created = now
                 e.modified = now
             try:
                 cls.objects.bulk_create(events)
             except Exception as exc:
                 # if an exception occurs, we should re-attempt to save the
                 # events one-by-one, because something in the list is
                 # broken/stale (e.g., an IntegrityError on a specific event)
                 for e in events:
                     try:
                         if (isinstance(exc, IntegrityError),
                                 getattr(e, 'host_id', '')):
                             # this is one potential IntegrityError we can
                             # work around - if the host disappears before
                             # the event can be processed
                             e.host_id = None
                         e.save()
                     except Exception:
                         logger.exception('Database Error Saving Job Event')
             for e in events:
                 emit_event_detail(e)
         self.buff = {}
예제 #2
0
 def flush(self, force=False):
     now = tz_now()
     if (force
             or any([len(events) >= 1000
                     for events in self.buff.values()])):
         for cls, events in self.buff.items():
             logger.debug(
                 f'{cls.__name__}.objects.bulk_create({len(events)})')
             for e in events:
                 if not e.created:
                     e.created = now
                 e.modified = now
             try:
                 cls.objects.bulk_create(events)
             except Exception:
                 # if an exception occurs, we should re-attempt to save the
                 # events one-by-one, because something in the list is
                 # broken/stale
                 for e in events:
                     try:
                         e.save()
                     except Exception:
                         logger.exception('Database Error Saving Job Event')
             for e in events:
                 emit_event_detail(e)
         self.buff = {}
예제 #3
0
 def flush(self, force=False):
     now = tz_now()
     if force or (time.time() - self.last_flush
                  ) > settings.JOB_EVENT_BUFFER_SECONDS or any([
                      len(events) >= 1000 for events in self.buff.values()
                  ]):
         bulk_events_saved = 0
         singular_events_saved = 0
         metrics_events_batch_save_errors = 0
         for cls, events in self.buff.items():
             logger.debug(
                 f'{cls.__name__}.objects.bulk_create({len(events)})')
             for e in events:
                 if not e.created:
                     e.created = now
                 e.modified = now
             duration_to_save = time.perf_counter()
             try:
                 cls.objects.bulk_create(events)
                 bulk_events_saved += len(events)
             except Exception:
                 # if an exception occurs, we should re-attempt to save the
                 # events one-by-one, because something in the list is
                 # broken/stale
                 metrics_events_batch_save_errors += 1
                 for e in events:
                     try:
                         e.save()
                         singular_events_saved += 1
                     except Exception:
                         logger.exception('Database Error Saving Job Event')
             duration_to_save = time.perf_counter() - duration_to_save
             for e in events:
                 if not getattr(e, '_skip_websocket_message', False):
                     emit_event_detail(e)
         self.buff = {}
         self.last_flush = time.time()
         # only update metrics if we saved events
         if (bulk_events_saved + singular_events_saved) > 0:
             self.subsystem_metrics.inc(
                 'callback_receiver_batch_events_errors',
                 metrics_events_batch_save_errors)
             self.subsystem_metrics.inc(
                 'callback_receiver_events_insert_db_seconds',
                 duration_to_save)
             self.subsystem_metrics.inc(
                 'callback_receiver_events_insert_db',
                 bulk_events_saved + singular_events_saved)
             self.subsystem_metrics.observe(
                 'callback_receiver_batch_events_insert_db',
                 bulk_events_saved)
             self.subsystem_metrics.inc(
                 'callback_receiver_events_in_memory',
                 -(bulk_events_saved + singular_events_saved))
         if self.subsystem_metrics.should_pipe_execute() is True:
             self.subsystem_metrics.pipe_execute()
예제 #4
0
    def run(self,
            job_id,
            speed=1.0,
            verbosity=0,
            skip_range=[],
            random_seed=0,
            final_status_delay=0,
            debug=False):
        stats = {
            'events_ontime': {
                'total': 0,
                'percentage': 0,
            },
            'events_late': {
                'total': 0,
                'percentage': 0,
                'lateness_total': 0,
                'lateness_average': 0,
            },
            'events_total': 0,
            'events_distance_total': 0,
            'events_distance_average': 0,
            'recording_start': 0,
            'recording_end': 0,
            'recording_duration': 0,
            'replay_start': 0,
            'replay_end': 0,
            'replay_duration': 0,
        }
        try:
            job = self.get_job(job_id)
            job_events, job_event_count = self.get_job_events(job)
        except RuntimeError as e:
            print("{}".format(e.message))
            sys.exit(1)

        je_previous = None

        self.emit_job_status(job, 'pending')
        self.emit_job_status(job, 'waiting')
        self.emit_job_status(job, 'running')

        finish_status_index = self.determine_job_event_finish_status_index(
            job_event_count, random_seed)

        for n, je_current in enumerate(job_events):
            if je_current.counter in skip_range:
                continue

            if debug:
                input("{} of {}:".format(n, job_event_count))

            if not je_previous:
                stats['recording_start'] = je_current.created
                self.start(je_current.created)
                stats['replay_start'] = self.replay_start
                je_previous = je_current

            emit_event_detail(je_current)

            replay_offset = self.replay_offset(je_previous.created, speed)
            recording_diff = (je_current.created - je_previous.created
                              ).total_seconds() * (1.0 / speed)
            stats['events_distance_total'] += recording_diff
            if verbosity >= 3:
                print(
                    "recording: next job in {} seconds".format(recording_diff))
            if replay_offset >= 0:
                replay_diff = recording_diff - replay_offset

                if replay_diff > 0:
                    stats['events_ontime']['total'] += 1
                    if verbosity >= 3:
                        print("\treplay: sleep for {} seconds".format(
                            replay_diff))
                    self.sleep(replay_diff)
                else:
                    stats['events_late']['total'] += 1
                    stats['events_late']['lateness_total'] += (replay_diff *
                                                               -1)
                    if verbosity >= 3:
                        print("\treplay: too far behind to sleep {} seconds".
                              format(replay_diff))
            else:
                replay_offset = self.replay_offset(je_current.created, speed)
                stats['events_late']['lateness_total'] += (replay_offset * -1)
                stats['events_late']['total'] += 1
                if verbosity >= 3:
                    print(
                        "\treplay: behind by {} seconds".format(replay_offset))

            stats['events_total'] += 1
            je_previous = je_current

            if n == finish_status_index:
                if final_status_delay != 0:
                    self.sleep(final_status_delay)
                self.emit_job_status(job, job.status)

        if stats['events_total'] > 2:
            stats['replay_end'] = self.now()
            stats['replay_duration'] = (stats['replay_end'] -
                                        stats['replay_start']).total_seconds()
            stats['replay_start'] = stats['replay_start'].isoformat()
            stats['replay_end'] = stats['replay_end'].isoformat()

            stats['recording_end'] = je_current.created
            stats['recording_duration'] = (
                stats['recording_end'] -
                stats['recording_start']).total_seconds()
            stats['recording_start'] = stats['recording_start'].isoformat()
            stats['recording_end'] = stats['recording_end'].isoformat()

            stats['events_ontime']['percentage'] = (
                stats['events_ontime']['total'] /
                float(stats['events_total'])) * 100.00
            stats['events_late']['percentage'] = (
                stats['events_late']['total'] /
                float(stats['events_total'])) * 100.00
            stats['events_distance_average'] = stats[
                'events_distance_total'] / stats['events_total']
            stats['events_late']['lateness_average'] = stats['events_late'][
                'lateness_total'] / stats['events_late']['total']
        else:
            stats = {'events_total': stats['events_total']}

        if verbosity >= 2:
            print(json.dumps(stats, indent=4, sort_keys=True))
예제 #5
0
파일: callback.py 프로젝트: timkids/awx
 def flush(self, force=False):
     now = tz_now()
     if force or (time.time() - self.last_flush
                  ) > settings.JOB_EVENT_BUFFER_SECONDS or any([
                      len(events) >= 1000 for events in self.buff.values()
                  ]):
         metrics_bulk_events_saved = 0
         metrics_singular_events_saved = 0
         metrics_events_batch_save_errors = 0
         metrics_events_broadcast = 0
         metrics_events_missing_created = 0
         metrics_total_job_event_processing_seconds = datetime.timedelta(
             seconds=0)
         for cls, events in self.buff.items():
             logger.debug(
                 f'{cls.__name__}.objects.bulk_create({len(events)})')
             for e in events:
                 e.modified = now  # this can be set before created because now is set above on line 149
                 if not e.created:
                     e.created = now
                     metrics_events_missing_created += 1
                 else:  # only calculate the seconds if the created time already has been set
                     metrics_total_job_event_processing_seconds += e.modified - e.created
             metrics_duration_to_save = time.perf_counter()
             try:
                 cls.objects.bulk_create(events)
                 metrics_bulk_events_saved += len(events)
             except Exception:
                 # if an exception occurs, we should re-attempt to save the
                 # events one-by-one, because something in the list is
                 # broken/stale
                 metrics_events_batch_save_errors += 1
                 for e in events:
                     try:
                         e.save()
                         metrics_singular_events_saved += 1
                     except Exception:
                         logger.exception('Database Error Saving Job Event')
             metrics_duration_to_save = time.perf_counter(
             ) - metrics_duration_to_save
             for e in events:
                 if not getattr(e, '_skip_websocket_message', False):
                     metrics_events_broadcast += 1
                     emit_event_detail(e)
                 if getattr(e, '_notification_trigger_event', False):
                     job_stats_wrapup(getattr(e, e.JOB_REFERENCE), event=e)
         self.buff = {}
         self.last_flush = time.time()
         # only update metrics if we saved events
         if (metrics_bulk_events_saved + metrics_singular_events_saved) > 0:
             self.subsystem_metrics.inc(
                 'callback_receiver_batch_events_errors',
                 metrics_events_batch_save_errors)
             self.subsystem_metrics.inc(
                 'callback_receiver_events_insert_db_seconds',
                 metrics_duration_to_save)
             self.subsystem_metrics.inc(
                 'callback_receiver_events_insert_db',
                 metrics_bulk_events_saved + metrics_singular_events_saved)
             self.subsystem_metrics.observe(
                 'callback_receiver_batch_events_insert_db',
                 metrics_bulk_events_saved)
             self.subsystem_metrics.inc(
                 'callback_receiver_events_in_memory',
                 -(metrics_bulk_events_saved +
                   metrics_singular_events_saved))
             self.subsystem_metrics.inc(
                 'callback_receiver_events_broadcast',
                 metrics_events_broadcast)
             self.subsystem_metrics.set(
                 'callback_receiver_event_processing_avg_seconds',
                 metrics_total_job_event_processing_seconds.total_seconds()
                 /
                 (metrics_bulk_events_saved + metrics_singular_events_saved
                  - metrics_events_missing_created),
             )
         if self.subsystem_metrics.should_pipe_execute() is True:
             self.subsystem_metrics.pipe_execute()