class Event(models.Model): """Event instance which may result in one or more Alerts.""" timestamp = models.DateTimeField(auto_now=True) eventid = UUIDField(default=uuid.uuid4, editable=False) severity = models.IntegerField(validators=[MinValueValidator(0), MaxValueValidator(100)]) log_message = models.TextField(null=True, blank=True) context = PickledObjectField() trigger_result = PickledObjectField() def __unicode__(self): return '<Event %s/%s (%s)>' % (self.id, self.eventid, self.timestamp) def __repr__(self): return unicode(self) def get_details(self): """Return details in a string""" msg = [] fmt = '{0:15}: {1}' msg.append(fmt.format('ID', self.id)) msg.append(fmt.format('EventID', self.eventid)) msg.append(fmt.format('Severity', self.severity)) msg.append(fmt.format('Timestamp', self.timestamp)) msg.append(fmt.format('Log Message', self.log_message)) msg.append(fmt.format('Trigger Result', self.trigger_result)) msg.append(fmt.format('Context', self.context)) alerts = self.alert_set.all() if alerts: msg.append('') msg.append('Associated Alerts:') for a in alerts: msg.append(a.get_details()) return '\n'.join(msg)
class ErrorHandler(models.Model): """Special alert which bypasses triggers and gets immediately routed. The template/template_func attributes need to be provided to create an associated Destination object, and only one Destination can be defined per ErrorHandler. One ErrorHandler should be defined for each desired route. """ name = models.CharField(max_length=100) source = PickledObjectField() destination = models.ForeignKey('Destination') allow_global = models.BooleanField(default=False) def __unicode__(self): return '<ErrorHandler %d/%s>' % (self.id, self.name) @classmethod def create(cls, name, source, sender, options=None, template=None, template_func=None, allow_global=False): """Create new ErrorHandler and its associated Destination.""" destination = Destination.create(sender, options, template, template_func) # when called via Trigger classmethod source has already been encoded if not isinstance(source, frozenset): source = Source.encode(source) e = ErrorHandler(name=name, source=source, destination=destination, allow_global=allow_global) e.save() return e
class Alert(models.Model): """Individual notification sent by a Sender for a specific Event.""" timestamp = models.DateTimeField(auto_now=True) event = models.ForeignKey('Event', related_name='alerts') level = models.CharField(max_length=50, choices=AlertLevels.get_choices()) sender = models.CharField(max_length=100) options = PickledObjectField(blank=True, null=True) message = models.TextField() def __unicode__(self): msg = self.message if len(msg) > 20: msg = '%s...' % msg[:20] return '<Alert %s (%s/%s)>' % (self.id or 'X', self.sender, msg) def __repr__(self): return unicode(self) def get_details(self): """Return details in a string""" msg = [] fmt = '{0:15}: {1}' msg.append(fmt.format('ID', self.id)) msg.append(fmt.format('EventID', self.event.eventid)) msg.append(fmt.format('Timestamp', self.timestamp)) msg.append(fmt.format('Level', self.level)) msg.append(fmt.format('Sender', self.sender)) msg.append(fmt.format('Dest options', self.options)) msg.append(fmt.format('Message', self.message)) return '\n'.join(msg)
class ExistingIntervals(models.Model): """Store the existing time intervals in db for each table and a set of criteria fields (represented by the table_handle field). """ namespace = models.CharField(max_length=20) sourcefile = models.CharField(max_length=200) table = models.CharField(max_length=50) criteria = PickledObjectField(null=True) table_handle = models.CharField(max_length=100, default="") intervals = PickledObjectField(null=True) tzinfo = PickledObjectField()
class ExistingIntervals(models.Model): """Store the existing time intervals in db for each table and a set of criteria fields (represented by the table_handle field). """ namespace = models.CharField(max_length=20) sourcefile = models.CharField(max_length=200) table = models.CharField(max_length=50) criteria = PickledObjectField(null=True) table_handle = models.CharField(max_length=100, default="") intervals = PickledObjectField(null=True) tzinfo = PickledObjectField() def __unicode__(self): return "<ExistingIntervals %s/%s - %s>" % (self.id, self.table_handle, self.intervals) def __repr__(self): return unicode(self)
class WidgetAuthToken(models.Model): """ Authentication token for each user per widget per report """ token = models.CharField(max_length=200) user = models.ForeignKey(AppfwkUser) pre_url = models.CharField(max_length=200, verbose_name='URL') criteria = PickledObjectField() edit_fields = SeparatedValuesField(null=True) touched = models.DateTimeField(auto_now=True, verbose_name='Last Time used') def __unicode__(self): return ("<Token %s, User %s, pre_url %s>" % (self.token, self.user, self.pre_url))
class Destination(models.Model): name = models.CharField(max_length=100) sender = models.CharField(max_length=100) options = PickledObjectField(blank=True, null=True) template = models.TextField(blank=True, null=True) template_func = FunctionField(null=True) def __unicode__(self): if self.options: return '<Destination %d/%s -> %s>' % (self.id, self.sender, str(self.options)) else: return '<Destination %d/%s>' % (self.id, self.sender) def save(self, *args, **kwargs): if self.template is None and self.template_func is None: msg = ('Missing template or template_func definition in ' 'Destination creation for Destination %s' % self) raise AttributeError(msg) super(Destination, self).save() @classmethod def create(cls, sender, options=None, template=None, template_func=None): r = Destination(sender=sender, options=options, template=template, template_func=template_func) r.save() return r def get_sender(self): """Return instance of Sender associated with the model. """ return find_sender(self.sender)() def get_message(self, context): """Return string from either template_func or template processed with result and given context. """ logger.debug('XXX here - get_message context keys/template/func: ' '%s/%s/%s' % (context.keys(), self.template, self.template_func)) if self.template_func: try: return self.template_func(self, **context) except Exception as e: logger.error('Error processing template function: %s' % e) else: return self.template.format(**context)
class Trigger(models.Model): name = models.CharField(max_length=100) source = PickledObjectField() trigger_func = FunctionField() destinations = models.ManyToManyField('Destination') def save(self, *args, **kwargs): if not self.name: self.name = 'trigger_' + hash(self.source) super(Trigger, self).save(*args, **kwargs) TriggerCache.clear() def delete(self, *args, **kwargs): TriggerCache.clear() super(Trigger, self).delete(*args, **kwargs) @classmethod def create(cls, source, trigger_func, params=None, **kwargs): """Create trigger against given source table. :param table source: Table object reference :param function trigger_func: function object to run for trigger :param dict params: optional additional parameters to pass to trigger_func """ tfunc = Function(trigger_func, params=params) t = Trigger(name=kwargs.pop('name', Source.name(source)), source=Source.encode(source), trigger_func=tfunc, **kwargs) t.save() return t def add_destination(self, sender, options=None, template=None, template_func=None): """Assign destination to the given Trigger. :param str sender: name of sender class to use :param dict options: optional dictionary of attributes :param str template: format string to use for resulting alert :param function template_func: optional function which returns a formatted string, receives same context as template """ r = Destination.create(sender=sender, options=options, template=template, template_func=template_func) self.destinations.add(r) def add_error_handler(self, sender, options=None, template=None, template_func=None, allow_global=False): """Convenience method to create error handler for same source.""" e = ErrorHandler.create(name=self.name + 'ErrorHandler', source=self.source, sender=sender, options=options, template=template, template_func=template_func, allow_global=allow_global) return e
class Widget(models.Model): """ Defines a UI widget and the source datatables """ tables = models.ManyToManyField(Table) section = models.ForeignKey(Section) title = models.CharField(max_length=100) row = models.IntegerField() col = models.IntegerField() width = models.IntegerField(default=1) height = models.IntegerField(default=300) rows = models.IntegerField(default=-1) options = PickledObjectField() module = models.CharField(max_length=100) uiwidget = models.CharField(max_length=100) uioptions = PickledObjectField() # not globally unique, but should be sufficiently unique within a report slug = models.SlugField(max_length=100) objects = InheritanceManager() def __repr__(self): return '<Widget %s (%s)>' % (self.title, self.id) def __unicode__(self): return '<Widget %s (%s)>' % (self.title, self.id) def save(self, *args, **kwargs): self.slug = '%s-%d-%d' % (slugify(self.title), self.row, self.col) super(Widget, self).save(*args, **kwargs) def get_definition(self, criteria): """Get dict of widget attributes for sending via JSON.""" report = self.section.report widget_def = { "widgettype": self.widgettype().split("."), "posturl": reverse('widget-job-list', args=(report.namespace, report.slug, self.slug)), "updateurl": reverse('widget-criteria', args=(report.namespace, report.slug, self.slug)), "options": self.uioptions, "widgetid": self.id, "widgetslug": self.slug, "row": self.row, "width": self.width, "height": self.height, "criteria": criteria, } return widget_def def widgettype(self): return '%s.%s' % (self.module.split('.')[-1], self.uiwidget) def table(self, i=0): return self.tables.all()[i] def compute_row_col(self): rowmax = self.section.report.widgets().aggregate(Max('row')) row = rowmax['row__max'] if row is None: row = 1 col = 1 else: widthsum = (self.section.report.widgets().filter( row=row).aggregate(Sum('width'))) width = widthsum['width__sum'] if width + self.width > 12: row += 1 col = 1 else: col = width + 1 self.row = row self.col = col def collect_fields(self): # Gather up all fields fields = SortedDict() # All fields attached to the section's report for f in self.section.report.fields.all().order_by('id'): fields[f.keyword] = f # All fields attached to the section for f in self.section.fields.all().order_by('id'): if f.keyword not in fields: fields[f.keyword] = f # All fields attached to any Widget's Tables for w in self.section.widget_set.all().order_by('id'): for t in w.tables.all(): for f in t.fields.all().order_by('id'): if f.keyword not in fields: fields[f.keyword] = f return fields
class Widget(models.Model): """ Defines a UI widget and the source datatables """ tables = models.ManyToManyField(Table) section = models.ForeignKey(Section) title = models.CharField(max_length=100) row = models.IntegerField() col = models.IntegerField() width = models.IntegerField(default=6) # setting height of 0 will let widget box auto-size to resulting data height = models.IntegerField(default=300) rows = models.IntegerField(default=-1) options = PickledObjectField() module = models.CharField(max_length=100) uiwidget = models.CharField(max_length=100) uioptions = PickledObjectField() # not globally unique, but should be sufficiently unique within a report slug = models.SlugField(max_length=100) # widget to be stacked below the previous widget on the same row stack_widget = models.BooleanField(default=False) objects = InheritanceManager() def __repr__(self): return '<Widget %s (%s)>' % (self.title, self.id) def __unicode__(self): return '<Widget %s (%s)>' % (self.title, self.id) def save(self, *args, **kwargs): self.slug = '%s-%d-%d' % (slugify(self.title), self.row, self.col) super(Widget, self).save(*args, **kwargs) @classmethod def create(cls, *args, **kwargs): options = kwargs.pop('options', None) table = kwargs.pop('table', None) w = Widget(*args, **kwargs) w.compute_row_col() if options: w.options = JsonDict(options) w.save() if table: w.tables.add(table) return w def get_definition(self, criteria): """Get dict of widget attributes for sending via JSON.""" report = self.section.report widget_def = { "widgettype": self.widgettype().split("."), "posturl": reverse('widget-job-list', args=(report.namespace, report.slug, self.slug)), "updateurl": reverse('widget-criteria', args=(report.namespace, report.slug, self.slug)), "options": self.uioptions, "widgetid": self.id, "widgetslug": self.slug, "row": self.row, "width": self.width, "height": self.height, "criteria": criteria, } return widget_def def widgettype(self): return '%s.%s' % (self.module.split('.')[-1], self.uiwidget) def table(self, i=0): return self.tables.all()[i] def compute_row_col(self): rowmax = self.section.report.widgets().aggregate(Max('row')) row = rowmax['row__max'] if row is None: row = 1 col = 1 elif self.stack_widget: # This widget needs to be stacked below the previous widget pre_w = self.section.report.widgets().order_by('-row', '-col')[0] if pre_w.width != self.width: raise ValueError("The stack widget with title '%s' should set " "with width %s." % (self.title, pre_w.width)) elif pre_w.title.lower() == self.title.lower(): raise ValueError("The stack widget title '%s' is the same as " "the previous widget, thus should be " "changed." % self.title) row = pre_w.row col = pre_w.col else: widthsum = (self.section.report.widgets().filter( row=row).aggregate(Sum('width'))) width = widthsum['width__sum'] if width + self.width > 12: row += 1 col = 1 else: col = width + 1 self.row = row self.col = col def collect_fields(self): # Gather up all fields fields = OrderedDict() # All fields attached to the section's report for f in self.section.report.fields.all().order_by('id'): fields[f.keyword] = f # All fields attached to the section for f in self.section.fields.all().order_by('id'): if f.keyword not in fields: fields[f.keyword] = f # All fields attached to any Widget's Tables for w in self.section.widget_set.all().order_by('id'): for t in w.tables.all(): for f in t.fields.all().order_by('id'): if f.keyword not in fields: fields[f.keyword] = f return fields
class ReportHistory(models.Model): """ Define a record history of running report.""" namespace = models.CharField(max_length=50) slug = models.CharField(max_length=50) bookmark = models.CharField(max_length=400) first_run = models.DateTimeField() last_run = models.DateTimeField() job_handles = models.TextField() user = models.CharField(max_length=50) criteria = PickledObjectField() run_count = models.IntegerField() status_choices = ((ReportStatus.NEW, "New"), (ReportStatus.RUNNING, "Running"), (ReportStatus.COMPLETE, "Complete"), (ReportStatus.ERROR, "Error")) status = models.IntegerField(default=ReportStatus.NEW, choices=status_choices) @classmethod def create(cls, **kwargs): """ Create a new report history object and save it to database. :param str namespace: name of one set of report slugs :param str slug: the slug of the report :param str bookmark: the bookmark link of the report :param datetime last_run: Time when the report with the same criteria ran at the first time :param datetime last_run: Time when the report with the same criteria ran last time :param str job_handles: comma separated job handle strings of the report :param str user: name of the user who ran the report :param dict criteria: criteria fields that the report is running with :param int run_count: the number of times the report has run with the same criteria :return: the created report history object """ job_handles = kwargs.get('job_handles') try: rh_obj = cls.objects.get(job_handles=job_handles) except ObjectDoesNotExist: rh_obj = cls(**kwargs) rh_obj.save() else: with TransactionLock(rh_obj, '%s_create' % rh_obj): rh_obj.status = ReportStatus.NEW rh_obj.last_run = kwargs.get('last_run') rh_obj.run_count += 1 rh_obj.save() finally: return def __unicode__(self): return ("<Report History %s %s/%s>" % (self.id, self.namespace, self.slug)) def __repr__(self): return unicode(self) def update_status(self, status): if self.status != status: with TransactionLock(self, '%s.update_status' % self): self.status = status self.save() def format_ts(self, ts): ltime = timezone.localtime(ts) return ltime.strftime("%Y/%m/%d %H:%M:%S") @property def format_last_run(self): return self.format_ts(self.last_run) @property def format_first_run(self): return self.format_ts(self.first_run) @property def status_name(self): return self.status_choices[self.status][1] @property def criteria_html(self): # length of business_hours_weekends. # current longest field tr_line = '<tr><td><b>{0}</b>: </td><td>{1}</td></tr>' cprops = self.criteria.keys() cprops.sort() rstr = '<table>' for k in cprops: rstr += tr_line.format(k, self.criteria[k]) rstr += '</table>' # logger.debug("criteria_html: {0}".format(rstr)) return rstr
class Column(models.Model): table = models.ForeignKey(Table) name = models.CharField(max_length=300) label = models.CharField(max_length=300, null=True) position = models.IntegerField(default=1) options = PickledObjectField() iskey = models.BooleanField(default=False) synthetic = models.BooleanField(default=False) # Ephemeral columns are columns added to a table at run-time ephemeral = models.ForeignKey('jobs.Job', null=True) compute_post_resample = models.BooleanField(default=False) compute_expression = models.CharField(max_length=300) resample_operation = models.CharField(max_length=300, default='sum') DATATYPE_FLOAT = 0 DATATYPE_INTEGER = 1 DATATYPE_TIME = 2 DATATYPE_STRING = 3 DATATYPE_HTML = 4 DATATYPE_DATE = 5 DATATYPE_INTEGER64 = 6 datatype = models.IntegerField( default=DATATYPE_FLOAT, choices=((DATATYPE_FLOAT, "float"), (DATATYPE_INTEGER, "integer"), (DATATYPE_TIME, "time"), (DATATYPE_STRING, "string"), (DATATYPE_HTML, "html"), (DATATYPE_DATE, "date"), (DATATYPE_INTEGER64, "integer64"))) UNITS_NONE = 0 UNITS_SECS = 1 UNITS_MSECS = 2 UNITS_BYTES = 3 UNITS_BYTES_PER_SEC = 4 UNITS_PCT = 5 UNITS_BITS = 6 UNITS_BITS_PER_SEC = 7 UNITS_SECS_VERBOSE = 8 UNITS_MSECS_VERBOSE = 9 UNITS_BYTES_VERBOSE = 10 UNITS_BYTES_PER_SEC_VERBOSE = 11 UNITS_PCT_VERBOSE = 12 UNITS_BITS_VERBOSE = 13 UNITS_BITS_PER_SEC_VERBOSE = 14 units = models.IntegerField( default=UNITS_NONE, choices=((UNITS_NONE, "none"), (UNITS_SECS, "s"), (UNITS_MSECS, "ms"), (UNITS_BYTES, "B"), (UNITS_BYTES_PER_SEC, "B/s"), (UNITS_PCT, "pct"), (UNITS_BITS, "b"), (UNITS_BITS_PER_SEC, "b/s"), (UNITS_SECS_VERBOSE, "seconds"), (UNITS_MSECS_VERBOSE, "milliseconds"), (UNITS_BYTES_VERBOSE, "bytes"), (UNITS_BYTES_PER_SEC_VERBOSE, "bytes/second"), (UNITS_PCT_VERBOSE, "percent"), (UNITS_BITS_VERBOSE, "bits"), (UNITS_BITS_PER_SEC_VERBOSE, "bits/second"))) formatter = models.TextField(null=True, blank=True) # default options to populate options field COLUMN_OPTIONS = {} POS_MAX = 0 def __unicode__(self): return "<Column %s (%s)>" % (str(self.id), self.name) def __repr__(self): return unicode(self) def save(self, *args, **kwargs): if self.label is None: self.label = self.name super(Column, self).save() @classmethod def create(cls, table, name, label=None, datatype=DATATYPE_FLOAT, units=UNITS_NONE, iskey=False, position=None, **kwargs): column_options = copy.deepcopy(cls.COLUMN_OPTIONS) keys = kwargs.keys() cp = dict((k, kwargs.pop(k)) for k in keys if k in column_options) column_options.update(**cp) if column_options: options = JsonDict(default=column_options) else: options = None keys = kwargs.keys() ckeys = [f.name for f in Column._meta.local_fields] col_kwargs = dict((k, kwargs.pop(k)) for k in keys if k in ckeys) if kwargs: raise AttributeError('Invalid keyword arguments: %s' % str(kwargs)) ephemeral = col_kwargs.get('ephemeral', None) if len( Column.objects.filter( table=table, name=name, ephemeral=ephemeral)) > 0: raise ValueError("Column %s already in use for table %s" % (name, str(table))) datatype = check_field_choice(cls, 'datatype', datatype) units = check_field_choice(cls, 'units', units) c = Column(table=table, name=name, label=label, datatype=datatype, units=units, iskey=iskey, options=options, **col_kwargs) try: c.save() except DatabaseError as e: if 'no such table' in str(e): msg = str(e) + ' -- did you forget class Meta: proxy=True?' raise DatabaseError(msg) raise c.position = position or c.id c.save() return c def isnumeric(self): return self.datatype in (self.DATATYPE_FLOAT, self.DATATYPE_INTEGER, self.DATATYPE_INTEGER64) def istime(self): return self.datatype == self.DATATYPE_TIME def isdate(self): return self.datatype == self.DATATYPE_DATE def isstring(self): return self.datatype == self.DATATYPE_STRING def units_str(self): if self.units == self.UNITS_NONE: return None return field_choice_str(self, 'units', self.units)
class TableField(models.Model): """ Defines a single field associated with a table. TableFields define the the parameters that are used by a Table at run time. The Table.fields attribute associates one or more fields with the table. At run time, a Criteria object binds values to each field. The Criteria object has an attribute matching each associated TableField keyword. When defining a TableField, the following model attributes may be specified: :param keyword: short identifier used like a variable name, this must be unique per table :param label: text label displayed in user interfaces :param help_text: descriptive help text associated with this field :param initial: starting or default value to use in user interfaces :param required: boolean indicating if a non-null values must be provided :param hidden: boolean indicating if this field should be hidden in user interfaces, usually true when the value is computed from other fields via post_process_func or post_process_template :param field_cls: Django Form Field class to use for rendering. If not specified, this defaults to CharField :param field_kwargs: Dictionary of additional field specific kwargs to pass to the field_cls constructor. :param parents: List of parent keywords that this field depends on for a final value. Used in conjunction with either post_process_func or post_process_template. :param pre_process_func: Function to call to perform any necessary preprocessing before rendering a form field or accepting user input. :param post_process_func: Function to call to perform any post submit processing. This may be additional value cleanup or computation based on other form data. :param post_process_template: Simple string format style template to fill in based on other form criteria. """ keyword = models.CharField(max_length=100) label = models.CharField(max_length=100, null=True, default=None) help_text = models.CharField(blank=True, null=True, default=None, max_length=400) initial = PickledObjectField(blank=True, null=True) required = models.BooleanField(default=False) hidden = models.BooleanField(default=False) field_cls = PickledObjectField(null=True) field_kwargs = PickledObjectField(blank=True, null=True) parent_keywords = SeparatedValuesField(null=True) pre_process_func = FunctionField(null=True) dynamic = models.BooleanField(default=False) post_process_func = FunctionField(null=True) post_process_template = models.CharField(null=True, max_length=500) @classmethod def create(cls, keyword, label=None, obj=None, **kwargs): parent_keywords = kwargs.pop('parent_keywords', None) if parent_keywords is None: parent_keywords = [] field = cls(keyword=keyword, label=label, **kwargs) field.save() if field.post_process_template is not None: f = string.Formatter() for (_, parent_keyword, _, _) in f.parse(field.post_process_template): if parent_keyword is not None: parent_keywords.append(parent_keyword) field.parent_keywords = parent_keywords field.save() if obj is not None: obj.fields.add(field) return field def __unicode__(self): return "<TableField %s (%s)>" % (self.keyword, self.id) def __repr__(self): return unicode(self) def is_report_criteria(self, table): """ Runs through intersections of widgets to determine if this criteria is applicable to the passed table report <--> widgets <--> table | L- TableField (self) """ wset = set(table.widget_set.all()) rset = set(self.report_set.all()) return any( wset.intersection(set(rwset.widget_set.all())) for rwset in rset) @classmethod def find_instance(cls, key): """ Return instance given a keyword. """ params = TableField.objects.filter(keyword=key) if len(params) == 0: return None elif len(params) > 1: raise KeyError("Multiple TableField matches found for %s" % key) param = params[0] return param
class Table(models.Model): name = models.CharField(max_length=200) # Table data is produced by a queryclassname defined within the # named module module = models.CharField(max_length=200) queryclassname = models.CharField(max_length=200) namespace = models.CharField(max_length=100) sourcefile = models.CharField(max_length=200) # list of column names sortcols = SeparatedValuesField(null=True) # list of asc/desc - must match len of sortcols sortdir = SeparatedValuesField(null=True) # Valid values for sort kwarg SORT_NONE = None SORT_ASC = 'asc' SORT_DESC = 'desc' rows = models.IntegerField(default=-1) filterexpr = models.CharField(null=True, max_length=400) # resample flag -- resample to the criteria.resolution # - this requires a "time" column resample = models.BooleanField(default=False) # options are typically fixed attributes defined at Table creation options = PickledObjectField() # list of fields that must be bound to values in criteria # that this table needs to run fields = models.ManyToManyField(TableField) # Default values for fields associated with this table, these # may be overridden by user criteria at run time criteria = PickledObjectField() # Function to call to tweak criteria for computing a job handle. # This must return a dictionary of key/value pairs of values # to use for computing a determining when a job must be rerun. criteria_handle_func = FunctionField(null=True) # Indicates if data can be cached cacheable = models.BooleanField(default=True) @classmethod def to_ref(cls, arg): """ Generate a table reference. :param arg: may be either a Table object, table id, or dictionary reference. """ if isinstance(arg, dict): if 'namespace' not in arg or 'name' not in arg: msg = 'Invalid table ref as dict, expected namespace/name' raise KeyError(msg) return arg if isinstance(arg, Table): table = arg elif hasattr(arg, 'table'): # Datasource table table = arg.table elif isinstance(arg, int): table = Table.objects.get(id=arg) else: raise ValueError('No way to handle Table arg of type %s' % type(arg)) return { 'sourcefile': table.sourcefile, 'namespace': table.namespace, 'name': table.name } @classmethod def from_ref(cls, ref): try: return Table.objects.get(sourcefile=ref['sourcefile'], namespace=ref['namespace'], name=ref['name']) except ObjectDoesNotExist: logger.exception( 'Failed to resolve table ref: %s/%s/%s' % (ref['sourcefile'], ref['namespace'], ref['name'])) raise def __unicode__(self): return "<Table %s (%s)>" % (str(self.id), self.name) def __repr__(self): return unicode(self) @property def queryclass(self): # Lookup the query class for the table associated with this task try: i = importlib.import_module(self.module) queryclass = i.__dict__[self.queryclassname] except: raise DatasourceException( "Could not lookup queryclass %s in module %s" % (self.queryclassname, self.module)) return queryclass def get_columns(self, synthetic=None, ephemeral=None, iskey=None): """ Return the list of columns for this table. `synthetic` is tri-state: None (default) is don't care, True means only synthetic columns, False means only non-synthetic columns `ephemeral` is a job reference. If specified, include ephemeral columns related to this job `iskey` is tri-state: None (default) is don't care, True means only key columns, False means only non-key columns """ filtered = [] for c in Column.objects.filter(table=self).order_by( 'position', 'name'): if synthetic is not None and c.synthetic != synthetic: continue if c.ephemeral is not None and c.ephemeral != ephemeral: continue if iskey is not None and c.iskey != iskey: continue filtered.append(c) return filtered def copy_columns(self, table, columns=None, except_columns=None, synthetic=None, ephemeral=None): """ Copy the columns from `table` into this table. This method will copy all the columns from another table, including all attributes as well as sorting. """ if not isinstance(table, Table): table = Table.from_ref(table) sortcols = [] sortdir = [] for c in table.get_columns(synthetic=synthetic, ephemeral=ephemeral): if columns is not None and c.name not in columns: continue if except_columns is not None and c.name in except_columns: continue if table.sortcols and (c.name in table.sortcols): sortcols.append(c.name) sortdir.append(table.sortdir[table.sortcols.index(c.name)]) c.pk = None c.table = self c.save() # Allocate an id, use that as the position c.position = c.id c.save() if sortcols: self.sortcols = sortcols self.sortdir = sortdir self.save() def compute_synthetic(self, job, df): """ Compute the synthetic columns from DF a two-dimensional array of the non-synthetic columns. Synthesis occurs as follows: 1. Compute all synthetic columns where compute_post_resample is False 2. If the table is a time-based table with a defined resolution, the result is resampled. 3. Any remaining columns are computed. """ if df is None: return None all_columns = job.get_columns() all_col_names = [c.name for c in all_columns] def compute(df, syncols): for syncol in syncols: expr = syncol.compute_expression g = tokenize.generate_tokens(StringIO(expr).readline) newexpr = "" getvalue = False getclose = False for ttype, tvalue, _, _, _ in g: if getvalue: if ttype != tokenize.NAME: msg = "Invalid syntax, expected {name}: %s" % tvalue raise ValueError(msg) elif tvalue in all_col_names: newexpr += "df['%s']" % tvalue elif tvalue in job.criteria: newexpr += '"%s"' % str(job.criteria.get(tvalue)) else: raise ValueError("Invalid variable name: %s" % tvalue) getclose = True getvalue = False elif getclose: if ttype != tokenize.OP and tvalue != "}": msg = "Invalid syntax, expected {name}: %s" % tvalue raise ValueError(msg) getclose = False elif ttype == tokenize.OP and tvalue == "{": getvalue = True else: newexpr += tvalue newexpr += ' ' try: df[syncol.name] = eval(newexpr) except NameError as e: m = (('%s: expression failed: %s, check ' 'APPFWK_SYNTHETIC_MODULES: %s') % (self, newexpr, str(e))) logger.exception(m) raise TableComputeSyntheticError(m) # 1. Compute synthetic columns where post_resample is False compute(df, [ col for col in all_columns if (col.synthetic and col.compute_post_resample is False) ]) # 2. Resample colmap = {} timecol = None for col in all_columns: colmap[col.name] = col if col.istime(): timecol = col.name if self.resample: if timecol is None: raise (TableComputeSyntheticError( "%s: 'resample' is set but no 'time' column'" % self)) if (('resolution' not in job.criteria) and ('resample_resolution' not in job.criteria)): raise (TableComputeSyntheticError( ("%s: 'resample' is set but criteria missing " + "'resolution' or 'resample_resolution'") % self)) how = {} for k in df.keys(): if k == timecol or k not in colmap: continue how[k] = colmap[k].resample_operation if 'resample_resolution' in job.criteria: resolution = job.criteria.resample_resolution else: resolution = job.criteria.resolution resolution = timedelta_total_seconds(resolution) if resolution < 1: raise (TableComputeSyntheticError( ("Table %s cannot resample at a resolution " + "less than 1 second") % self)) logger.debug('%s: resampling to %ss' % (self, int(resolution))) indexed = df.set_index(timecol) resampled = indexed.resample('%ss' % int(resolution), how, convention='end').reset_index() df = resampled # 3. Compute remaining synthetic columns (post_resample is True) compute(df, [ c for c in all_columns if (c.synthetic and c.compute_post_resample is True) ]) return df
class Job(models.Model): # Timestamp when the job was created created = models.DateTimeField(auto_now_add=True) # Timestamp the last time the job was accessed touched = models.DateTimeField(auto_now_add=True) # Number of references to this job refcount = models.IntegerField(default=0) # Parent job that spawned this job (and thus waiting for # this jobs results) parent = models.ForeignKey('self', null=True, related_name='children') # Master job that has run (or is running) that has the same # criteria. If master, this job is a "follower" master = models.ForeignKey('self', null=True, related_name='followers') # Table associated with this job table = models.ForeignKey(Table) # Criteria used to start this job - an instance of the Criteria class criteria = PickledObjectField(null=True) # Actual criteria as returned by the job after running actual_criteria = PickledObjectField(null=True) # Unique handle for the job handle = models.CharField(max_length=100, default="") # Job status NEW = 0 QUEUED = 1 RUNNING = 2 COMPLETE = 3 ERROR = 4 status = models.IntegerField(default=NEW, choices=((NEW, "New"), (QUEUED, "Queued"), (RUNNING, "Running"), (COMPLETE, "Complete"), (ERROR, "Error"))) # Process ID for original Task thread pid = models.IntegerField(default=None, null=True) # Message if job complete or error message = models.TextField(default="") # If an error comes from a Python exception, this will contain the full # exception text with traceback. exception = models.TextField(default="") # Whether to update detailed progress update_progress = models.BooleanField(default=True) # Callback function callback = CallableField() # Manager class for additional .objects methods objects = JobManager() def __unicode__(self): return "<Job %s (%8.8s) - t%s>" % (self.id, self.handle, self.table.id) def __repr__(self): return unicode(self) def json(self, data=None): """ Return a JSON representation of this Job. """ return { 'id': self.id, 'handle': self.handle, 'progress': self.progress, 'status': self.status, 'message': self.message, 'exception': self.exception, 'data': data } @property def progress(self): progress = progressd.get(self.id, 'progress') logger.debug('***PROGRESS: %s: %s' % (self.id, progress)) return int(progress) @property def is_child(self): return self.parent is not None @property def is_follower(self): return self.master is not None def reference(self, message=""): with TransactionLock(self, '%s.reference' % self): pk = self.pk Job.objects.filter(pk=pk).update(refcount=F('refcount') + 1) # logger.debug("%s: reference(%s) @ %d" % # (self, message, Job.objects.get(pk=pk).refcount)) def dereference(self, message=""): with TransactionLock(self, '%s.dereference' % self): pk = self.pk Job.objects.filter(pk=pk).update(refcount=F('refcount') - 1) # logger.debug("%s: dereference(%s) @ %d" % # (self, message, Job.objects.get(pk=pk).refcount)) def refresh(self): """ Refresh dynamic job parameters from the database. """ # fix bug 227119, by avoiding mysql caching problems # http://stackoverflow.com/a/7028362 # should be fixed in Django 1.6 # XXXCJ -- can we drop this now? Job.objects.update() job = Job.objects.get(pk=self.pk) for k in [ 'status', 'message', 'exception', 'actual_criteria', 'touched', 'refcount', 'callback', 'parent' ]: setattr(self, k, getattr(job, k)) def safe_update(self, **kwargs): """ Update the job with the passed dictionary in a database safe way. This method updates only the requested paraemters and refreshes the rest from the database. This should be used for all updates to Job's to ensure that unmodified keys are not accidentally clobbered by doing a blanket job.save(). """ logger.debug("%s safe_update %s" % (self, kwargs)) with TransactionLock(self, '%s.safe_update' % str(self)): Job.objects.filter(pk=self.pk).update(**kwargs) self.refresh() @classmethod def create(cls, table, criteria, update_progress=True, parent=None): # Adjust the criteria for this specific table, locking # down start/end times as needed criteria = criteria.build_for_table(table) try: criteria.compute_times() except ValueError: # Ignore errors, this table may not have start/end times pass # Compute the handle -- this will take into account # cacheability handle = Job._compute_handle(table, criteria) # Grab a lock on the row associated with the table with TransactionLock(table, "Job.create"): # Look for another job by the same handle in any state except ERROR master = Job.objects.get_master(handle) job = Job(table=table, criteria=criteria, actual_criteria=None, status=Job.NEW, pid=os.getpid(), handle=handle, parent=parent, master=master, update_progress=update_progress, message='', exception='') job.save() if master: master.reference("Master link from job %s" % job) now = datetime.datetime.now(tz=pytz.utc) master.safe_update(touched=now) logger.info("%s: New job for table %s, linked to master %s" % (job, table.name, master)) else: logger.info("%s: New job for table %s" % (job, table.name)) # Create new instance in progressd as part of same Transaction p = { 'job_id': job.id, 'status': job.status, 'progress': 0, 'master_id': job.master.id if job.master else 0, 'parent_id': job.parent.id if job.parent else 0 } logger.debug('***Creating Job to progressd: %s' % p) progressd.post(**p) # End of TransactionLock logger.debug("%s: criteria = %s" % (job, criteria)) return job def start(self, method=None, method_args=None): """ Start this job. """ with TransactionLock(self.table, '%s.start' % self): logger.info("%s: Job starting" % self) self.refresh() if self.is_follower: logger.debug("%s: Shadowing master job %s" % (self, self.master)) if self.master.status == Job.COMPLETE: self.mark_complete() elif self.master.status == Job.ERROR: self.mark_error(self.master.message, self.master.exception) return if method is None: method = self.table.queryclass.run # Create an task to do the work task = Task(self, Callable(method, method_args)) logger.debug("%s: Created task %s" % (self, task)) task.start() def schedule(self, jobs, callback): jobid_map = {} for name, job in jobs.iteritems(): jobid_map[name] = job.id logger.debug("%s: Setting callback %s" % (self, callback)) self.safe_update(callback=Callable(callback)) logger.debug("%s: Done setting callback %s" % (self, self.callback)) for name, job in jobs.iteritems(): job.start() def check_children(self, objlock=None): # get a lock on the child that's called us to ensure any status # from its transaction will be seen. if objlock is None: objlock = self with TransactionLock(objlock, '%s.checking_children' % self): running_children = Job.objects.filter( parent=self, status__in=[Job.NEW, Job.RUNNING]) logger.info("%s: %d running children" % (self, len(running_children))) logger.debug( "%s: all children: %s" % (self, ';'.join('%s - %s' % (j.status, j) for j in Job.objects.filter(parent=self)))) if len(running_children) > 0: # Not done yet, do nothing return # Grab a lock on this job to make sure only one caller # gets the callback with TransactionLock(self, '%s.check_children' % self): # Now that we have the lock, make sure we have latest Job # details self.refresh() logger.info("%s: checking callback %s" % (self, self.callback)) if self.callback is None: # Some other child got to it first return # Save off the callback, we'll call it outside the transaction callback = self.callback # Clear the callback while still in lockdown self.callback = None self.save() t = Task(self, callback=callback) logger.info("%s: Created callback task %s" % (self, t)) t.start() def done(self): self.status = int(progressd.get(self.id, 'status')) if self.status in (Job.COMPLETE, Job.ERROR): self.refresh() return self.status in (Job.COMPLETE, Job.ERROR) def mark_progress(self, progress, status=None): if status is None: status = Job.RUNNING logger.debug('***SAVING PROGRESS for %s: %s/%s' % (self.id, status, progress)) progress = int(float(progress)) try: progressd.put(self.id, status=status, progress=progress) except RvbdHTTPException as e: logger.debug('***Error saving progress for %s: %s' % (self.id, e)) def mark_done(self, status, **kwargs): with TransactionLock(self, '%s.mark_done' % self): self.refresh() old_status = self.status if old_status in (Job.COMPLETE, Job.ERROR): # Status was already set to a done state, avoid # double action and return now return self.status = status for k, v in kwargs.iteritems(): setattr(self, k, v) self.save() # On status change, do more... self.mark_progress(status=status, progress=100) if not self.is_follower: # Notify followers of this job followers = Job.objects.filter(master=self) for follower in followers: if self.status == Job.COMPLETE: kwargs['actual_criteria'] = self.actual_criteria follower.mark_complete(status=status, **kwargs) elif self.status == Job.ERROR: follower.mark_done(status=status, **kwargs) if self.parent: logger.info("%s: Asking parent %s to check children" % (self, self.parent)) t = Task(self.parent, callback=Callable(self.parent.check_children, called_kwargs={'objlock': self}), generic=True) logger.info("%s: Created check_children task %s" % (self, t)) t.start() return True def mark_complete(self, data=None, **kwargs): logger.info("%s: complete" % self) if data is not None: self._save_data(data) kwargs['status'] = Job.COMPLETE kwargs['message'] = '' if (self.actual_criteria is None and 'actual_criteria' not in kwargs): kwargs['actual_criteria'] = self.criteria self.mark_done(**kwargs) logger.info("%s: saved as COMPLETE" % self) # Send signal for possible Triggers post_data_save.send(sender=self, data=self.data, context={'job': self}) def mark_error(self, message, exception=None): if exception is None: exception = '' logger.warning("%s failed: %s" % (self, message)) self.mark_done(status=Job.ERROR, message=message, exception=exception) logger.info("%s: saved as ERROR" % self) # Send signal for possible Triggers error_signal.send(sender=self, context={'job': self}) @classmethod def _compute_handle(cls, table, criteria): h = hashlib.md5() h.update(str(table.id)) if table.cacheable and not criteria.ignore_cache: # XXXCJ - Drop ephemeral columns when computing the cache handle, # since the list of columns is modifed at run time. Typical use # case is an analysis table which creates a time-series graph of # the top 10 hosts -- one column per host. The host columns will # change based on the run of the dependent table. # # Including epheremal columns causes some problems because the # handle is computed before the query is actually run, so it never # matches. # # May want to dig in to this further and make sure this doesn't # pick up cache files when we don't want it to h.update('.'.join([c.name for c in table.get_columns()])) if table.criteria_handle_func: criteria = table.criteria_handle_func(criteria) for k, v in criteria.iteritems(): # logger.debug("Updating hash from %s -> %s" % (k,v)) h.update('%s:%s' % (k, v)) else: # Table is not cacheable, instead use current time plus a random # value just to get a unique hash h.update(str(datetime.datetime.now())) h.update(str(random.randint(0, 10000000))) return h.hexdigest() def get_columns(self, ephemeral=None, **kwargs): """ Return columns assocated with the table for the job. The returned column set includes ephemeral columns associated with this job unless ephemeral is set to False. """ if ephemeral is None: kwargs['ephemeral'] = self.master or self return self.table.get_columns(**kwargs) def _save_data(self, data): if isinstance(data, list) and len(data) > 0: # Convert the result to a dataframe columns = [col.name for col in self.get_columns(synthetic=False)] df = pandas.DataFrame(data, columns=columns) elif ((data is None) or (isinstance(data, list) and len(data) == 0)): df = None elif isinstance(data, pandas.DataFrame): df = data else: raise ValueError("Unrecognized query result type: %s" % type(data)) if df is not None: self.check_columns(df) df = self.normalize_types(df) df = self.table.compute_synthetic(self, df) # Sort according to the defined sort columns if self.table.sortcols: sorted = df.sort_values(self.table.sortcols, ascending=[ b == Table.SORT_ASC for b in self.table.sortdir ]) # Move NaN rows of the first sortcol to the end n = self.table.sortcols[0] df = (sorted[sorted[n].notnull()].append( sorted[sorted[n].isnull()])) if self.table.rows > 0: df = df[:self.table.rows] if df is not None: df.to_pickle(self.datafile()) logger.debug("%s data saved to file: %s" % (str(self), self.datafile())) else: logger.debug("%s no data saved, data is empty" % (str(self))) return df def datafile(self): """ Return the data file for this job. """ return os.path.join(settings.DATA_CACHE, "job-%s.data" % self.handle) def data(self): """ Returns a pandas.DataFrame of data, or None if not available. """ if not self.done(): logger.warning("%s: job not complete, no data available" % self) raise DataError("Job not complete, no data available") self.reference("data()") e = None try: logger.debug("%s looking for data file: %s" % (str(self), self.datafile())) if os.path.exists(self.datafile()): df = pandas.read_pickle(self.datafile()) logger.debug("%s data loaded %d rows from file: %s" % (str(self), len(df), self.datafile())) else: logger.debug("%s no data, missing data file: %s" % (str(self), self.datafile())) df = None except Exception as e: logger.error("Error loading datafile %s for %s" % (self.datafile(), str(self))) logger.error("Traceback:\n%s" % e) finally: self.dereference("data()") if e: raise e return df def values(self): """ Return data as a list of lists. """ df = self.data() if df is not None: # Replace NaN with None df = df.where(pandas.notnull(df), None) # Extract tha values in the right order all_columns = self.get_columns() all_col_names = [c.name for c in all_columns] # Straggling numpy data types may cause problems # downstream (json encoding, for example), so strip # things down to just native ints and floats vals = [] for row in df.ix[:, all_col_names].itertuples(): vals_row = [] for v in row[1:]: if (isinstance(v, numpy.number) or isinstance(v, numpy.bool_)): v = numpy.asscalar(v) vals_row.append(v) vals.append(vals_row) else: vals = [] return vals def check_columns(self, df): for col in self.get_columns(synthetic=False): if col.name not in df: raise ValueError('Returned table missing expected column: %s' % col.name) def normalize_types(self, df): for col in self.get_columns(synthetic=False): s = df[col.name] if col.istime(): # The column is supposed to be time, # make sure all values are datetime objects if str(s.dtype).startswith(str(pandas.np.dtype('datetime64'))): # Already a datetime pass elif str(s.dtype).startswith('int'): # Assume this is a numeric epoch, convert to datetime df[col.name] = s.astype('datetime64[s]') elif str(s.dtype).startswith('float'): # This is a numeric epoch as a float, possibly # has subsecond resolution, convert to # datetime but preserve up to millisecond df[col.name] = (1000 * s).astype('datetime64[ms]') else: # Possibly datetime object or a datetime string, # hopefully astype() can figure it out df[col.name] = s.astype('datetime64[ms]') # Make sure we are UTC, must use internal tzutc because # pytz timezones will cause an error when unpickling # https://github.com/pydata/pandas/issues/6871 # -- problem appears solved with latest pandas utc = pytz.utc try: df[col.name] = df[col.name].apply( lambda x: x.tz_localize(utc)) except TypeError as e: if e.message.startswith('Cannot localize'): df[col.name] = df[col.name].apply( lambda x: x.tz_convert(utc)) else: raise elif col.isdate(): if str(s.dtype).startswith(str(pandas.np.dtype('datetime64'))): # Already a datetime pass elif str(s.dtype).startswith('int'): # Assume this is a numeric epoch, convert to datetime df[col.name] = s.astype('datetime64[s]') elif str(s.dtype).startswith('float'): # This is a numeric epoch as a float, possibly # has subsecond resolution, convert to # datetime but preserve up to millisecond df[col.name] = (1000 * s).astype('datetime64[ms]') elif str(s.dtype).startswith('object'): # This is a datetime.date object, convert it to # datetime64[ns], tried to obtain datetime64[s] # by setting unit='s' but failed # It seems datetime64[ns] is accepted. df[col.name] = pandas.to_datetime(s) else: # Possibly datetime object or a datetime string, # hopefully astype() can figure it out df[col.name] = s.astype('datetime64[ms]') elif (col.isnumeric() and s.dtype == pandas.np.dtype('object')): # The column is supposed to be numeric but must have # some strings. Try replacing empty strings with NaN # and see if it converts to float64 try: df[col.name] = (s.replace('', pandas.np.NaN).astype( pandas.np.float64)) except ValueError: # This may incorrectly be tagged as numeric pass return df def combine_filterexprs(self, joinstr="and", exprs=None): self.refresh() if exprs is None: exprs = [] elif type(exprs) is not list: exprs = [exprs] exprs.append(self.table.filterexpr) nonnull_exprs = [] for e in exprs: if e != "" and e is not None: nonnull_exprs.append(e) if len(nonnull_exprs) > 1: return "(" + (") " + joinstr + " (").join(nonnull_exprs) + ")" elif len(nonnull_exprs) == 1: return nonnull_exprs[0] else: return ""