Python PickledObjectFieldの例

プログラミング言語: Python

名前空間/パッケージ名: steelscript.appfwk.libs.fields

クラス/型: PickledObjectField

hotexamples.comのコード掲載数: 15

Python PickledObjectField - 15件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsteelscript.appfwk.libs.fields.PickledObjectFieldの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PickledObjectField(15)

よく使われるメソッド

PickledObjectField (15)

コード例 #1

ファイルを表示

class Event(models.Model):
    """Event instance which may result in one or more Alerts."""
    timestamp = models.DateTimeField(auto_now=True)
    eventid = UUIDField(default=uuid.uuid4, editable=False)
    severity = models.IntegerField(validators=[MinValueValidator(0),
                                               MaxValueValidator(100)])
    log_message = models.TextField(null=True, blank=True)
    context = PickledObjectField()
    trigger_result = PickledObjectField()

    def __unicode__(self):
        return '<Event %s/%s (%s)>' % (self.id, self.eventid, self.timestamp)

    def __repr__(self):
        return unicode(self)

    def get_details(self):
        """Return details in a string"""
        msg = []
        fmt = '{0:15}: {1}'
        msg.append(fmt.format('ID', self.id))
        msg.append(fmt.format('EventID', self.eventid))
        msg.append(fmt.format('Severity', self.severity))
        msg.append(fmt.format('Timestamp', self.timestamp))
        msg.append(fmt.format('Log Message', self.log_message))
        msg.append(fmt.format('Trigger Result', self.trigger_result))
        msg.append(fmt.format('Context', self.context))

        alerts = self.alert_set.all()
        if alerts:
            msg.append('')
            msg.append('Associated Alerts:')
            for a in alerts:
                msg.append(a.get_details())
        return '\n'.join(msg)

コード例 #2

ファイルを表示

class ErrorHandler(models.Model):
    """Special alert which bypasses triggers and gets immediately routed.

    The template/template_func attributes need to be provided to create
    an associated Destination object, and only one Destination can be defined
    per ErrorHandler.

    One ErrorHandler should be defined for each desired route.

    """
    name = models.CharField(max_length=100)
    source = PickledObjectField()
    destination = models.ForeignKey('Destination')
    allow_global = models.BooleanField(default=False)

    def __unicode__(self):
        return '<ErrorHandler %d/%s>' % (self.id, self.name)

    @classmethod
    def create(cls, name, source, sender, options=None,
               template=None, template_func=None, allow_global=False):
        """Create new ErrorHandler and its associated Destination."""
        destination = Destination.create(sender, options,
                                         template, template_func)

        # when called via Trigger classmethod source has already been encoded
        if not isinstance(source, frozenset):
            source = Source.encode(source)

        e = ErrorHandler(name=name, source=source, destination=destination,
                         allow_global=allow_global)
        e.save()
        return e

コード例 #3

ファイルを表示

class Alert(models.Model):
    """Individual notification sent by a Sender for a specific Event."""
    timestamp = models.DateTimeField(auto_now=True)
    event = models.ForeignKey('Event', related_name='alerts')
    level = models.CharField(max_length=50, choices=AlertLevels.get_choices())
    sender = models.CharField(max_length=100)
    options = PickledObjectField(blank=True, null=True)
    message = models.TextField()

    def __unicode__(self):
        msg = self.message
        if len(msg) > 20:
            msg = '%s...' % msg[:20]
        return '<Alert %s (%s/%s)>' % (self.id or 'X',
                                       self.sender, msg)

    def __repr__(self):
        return unicode(self)

    def get_details(self):
        """Return details in a string"""
        msg = []
        fmt = '{0:15}: {1}'
        msg.append(fmt.format('ID', self.id))
        msg.append(fmt.format('EventID', self.event.eventid))
        msg.append(fmt.format('Timestamp', self.timestamp))
        msg.append(fmt.format('Level', self.level))
        msg.append(fmt.format('Sender', self.sender))
        msg.append(fmt.format('Dest options', self.options))
        msg.append(fmt.format('Message', self.message))
        return '\n'.join(msg)

コード例 #4

ファイルを表示

ファイル: models.py プロジェクト: gwenblum/steelscript-appfwk

class ExistingIntervals(models.Model):
    """Store the existing time intervals in db for each table and
    a set of criteria fields (represented by the table_handle field).
    """
    namespace = models.CharField(max_length=20)

    sourcefile = models.CharField(max_length=200)

    table = models.CharField(max_length=50)

    criteria = PickledObjectField(null=True)

    table_handle = models.CharField(max_length=100, default="")

    intervals = PickledObjectField(null=True)

    tzinfo = PickledObjectField()

コード例 #5

ファイルを表示

class ExistingIntervals(models.Model):
    """Store the existing time intervals in db for each table and
    a set of criteria fields (represented by the table_handle field).
    """
    namespace = models.CharField(max_length=20)
    sourcefile = models.CharField(max_length=200)
    table = models.CharField(max_length=50)
    criteria = PickledObjectField(null=True)
    table_handle = models.CharField(max_length=100, default="")
    intervals = PickledObjectField(null=True)
    tzinfo = PickledObjectField()

    def __unicode__(self):
        return "<ExistingIntervals %s/%s - %s>" % (self.id, self.table_handle,
                                                   self.intervals)

    def __repr__(self):
        return unicode(self)

コード例 #6

ファイルを表示

ファイル: models.py プロジェクト: Cloudxtreme/steelscript-appfwk

class WidgetAuthToken(models.Model):
    """ Authentication token for each user per widget per report """

    token = models.CharField(max_length=200)
    user = models.ForeignKey(AppfwkUser)
    pre_url = models.CharField(max_length=200, verbose_name='URL')
    criteria = PickledObjectField()
    edit_fields = SeparatedValuesField(null=True)
    touched = models.DateTimeField(auto_now=True,
                                   verbose_name='Last Time used')

    def __unicode__(self):
        return ("<Token %s, User %s, pre_url %s>" %
                (self.token, self.user, self.pre_url))

コード例 #7

ファイルを表示

ファイル: models.py プロジェクト: tagur87/steelscript-appfwk

class Destination(models.Model):
    name = models.CharField(max_length=100)
    sender = models.CharField(max_length=100)
    options = PickledObjectField(blank=True, null=True)
    template = models.TextField(blank=True, null=True)
    template_func = FunctionField(null=True)

    def __unicode__(self):
        if self.options:
            return '<Destination %d/%s -> %s>' % (self.id, self.sender,
                                                  str(self.options))
        else:
            return '<Destination %d/%s>' % (self.id, self.sender)

    def save(self, *args, **kwargs):
        if self.template is None and self.template_func is None:
            msg = ('Missing template or template_func definition in '
                   'Destination creation for Destination %s' % self)
            raise AttributeError(msg)
        super(Destination, self).save()

    @classmethod
    def create(cls, sender, options=None, template=None, template_func=None):
        r = Destination(sender=sender,
                        options=options,
                        template=template,
                        template_func=template_func)
        r.save()
        return r

    def get_sender(self):
        """Return instance of Sender associated with the model.
        """
        return find_sender(self.sender)()

    def get_message(self, context):
        """Return string from either template_func or template
        processed with result and given context.
        """
        logger.debug('XXX here - get_message context keys/template/func: '
                     '%s/%s/%s' %
                     (context.keys(), self.template, self.template_func))
        if self.template_func:
            try:
                return self.template_func(self, **context)
            except Exception as e:
                logger.error('Error processing template function: %s' % e)
        else:
            return self.template.format(**context)

コード例 #8

ファイルを表示

class Trigger(models.Model):
    name = models.CharField(max_length=100)
    source = PickledObjectField()
    trigger_func = FunctionField()
    destinations = models.ManyToManyField('Destination')

    def save(self, *args, **kwargs):
        if not self.name:
            self.name = 'trigger_' + hash(self.source)
        super(Trigger, self).save(*args, **kwargs)
        TriggerCache.clear()

    def delete(self, *args, **kwargs):
        TriggerCache.clear()
        super(Trigger, self).delete(*args, **kwargs)

    @classmethod
    def create(cls, source, trigger_func, params=None, **kwargs):
        """Create trigger against given source table.

        :param table source: Table object reference
        :param function trigger_func: function object to run for trigger
        :param dict params: optional additional parameters to pass to
            trigger_func
        """
        tfunc = Function(trigger_func, params=params)
        t = Trigger(name=kwargs.pop('name', Source.name(source)),
                    source=Source.encode(source),
                    trigger_func=tfunc,
                    **kwargs)
        t.save()
        return t

    def add_destination(self, sender, options=None,
                        template=None, template_func=None):
        """Assign destination to the given Trigger.

        :param str sender: name of sender class to use
        :param dict options: optional dictionary of attributes
        :param str template: format string to use for resulting alert
        :param function template_func: optional function which returns a
            formatted string, receives same context as template
        """
        r = Destination.create(sender=sender,
                               options=options,
                               template=template,
                               template_func=template_func)
        self.destinations.add(r)

    def add_error_handler(self, sender, options=None,
                          template=None, template_func=None,
                          allow_global=False):
        """Convenience method to create error handler for same source."""
        e = ErrorHandler.create(name=self.name + 'ErrorHandler',
                                source=self.source,
                                sender=sender,
                                options=options,
                                template=template,
                                template_func=template_func,
                                allow_global=allow_global)

        return e

コード例 #9

ファイルを表示

ファイル: models.py プロジェクト: Cloudxtreme/steelscript-appfwk

class Widget(models.Model):
    """ Defines a UI widget and the source datatables
    """
    tables = models.ManyToManyField(Table)
    section = models.ForeignKey(Section)
    title = models.CharField(max_length=100)
    row = models.IntegerField()
    col = models.IntegerField()
    width = models.IntegerField(default=1)
    height = models.IntegerField(default=300)
    rows = models.IntegerField(default=-1)
    options = PickledObjectField()

    module = models.CharField(max_length=100)
    uiwidget = models.CharField(max_length=100)
    uioptions = PickledObjectField()

    # not globally unique, but should be sufficiently unique within a report
    slug = models.SlugField(max_length=100)

    objects = InheritanceManager()

    def __repr__(self):
        return '<Widget %s (%s)>' % (self.title, self.id)

    def __unicode__(self):
        return '<Widget %s (%s)>' % (self.title, self.id)

    def save(self, *args, **kwargs):
        self.slug = '%s-%d-%d' % (slugify(self.title), self.row, self.col)
        super(Widget, self).save(*args, **kwargs)

    def get_definition(self, criteria):
        """Get dict of widget attributes for sending via JSON."""
        report = self.section.report

        widget_def = {
            "widgettype":
            self.widgettype().split("."),
            "posturl":
            reverse('widget-job-list',
                    args=(report.namespace, report.slug, self.slug)),
            "updateurl":
            reverse('widget-criteria',
                    args=(report.namespace, report.slug, self.slug)),
            "options":
            self.uioptions,
            "widgetid":
            self.id,
            "widgetslug":
            self.slug,
            "row":
            self.row,
            "width":
            self.width,
            "height":
            self.height,
            "criteria":
            criteria,
        }

        return widget_def

    def widgettype(self):
        return '%s.%s' % (self.module.split('.')[-1], self.uiwidget)

    def table(self, i=0):
        return self.tables.all()[i]

    def compute_row_col(self):
        rowmax = self.section.report.widgets().aggregate(Max('row'))
        row = rowmax['row__max']
        if row is None:
            row = 1
            col = 1
        else:
            widthsum = (self.section.report.widgets().filter(
                row=row).aggregate(Sum('width')))
            width = widthsum['width__sum']
            if width + self.width > 12:
                row += 1
                col = 1
            else:
                col = width + 1
        self.row = row
        self.col = col

    def collect_fields(self):
        # Gather up all fields
        fields = SortedDict()

        # All fields attached to the section's report
        for f in self.section.report.fields.all().order_by('id'):
            fields[f.keyword] = f

        # All fields attached to the section
        for f in self.section.fields.all().order_by('id'):
            if f.keyword not in fields:
                fields[f.keyword] = f

        # All fields attached to any Widget's Tables
        for w in self.section.widget_set.all().order_by('id'):
            for t in w.tables.all():
                for f in t.fields.all().order_by('id'):
                    if f.keyword not in fields:
                        fields[f.keyword] = f

        return fields

コード例 #10

ファイルを表示

ファイル: models.py プロジェクト: gwenblum/steelscript-appfwk

class Widget(models.Model):
    """ Defines a UI widget and the source datatables
    """
    tables = models.ManyToManyField(Table)
    section = models.ForeignKey(Section)
    title = models.CharField(max_length=100)
    row = models.IntegerField()
    col = models.IntegerField()
    width = models.IntegerField(default=6)

    # setting height of 0 will let widget box auto-size to resulting data
    height = models.IntegerField(default=300)
    rows = models.IntegerField(default=-1)
    options = PickledObjectField()

    module = models.CharField(max_length=100)
    uiwidget = models.CharField(max_length=100)
    uioptions = PickledObjectField()

    # not globally unique, but should be sufficiently unique within a report
    slug = models.SlugField(max_length=100)

    # widget to be stacked below the previous widget on the same row
    stack_widget = models.BooleanField(default=False)

    objects = InheritanceManager()

    def __repr__(self):
        return '<Widget %s (%s)>' % (self.title, self.id)

    def __unicode__(self):
        return '<Widget %s (%s)>' % (self.title, self.id)

    def save(self, *args, **kwargs):
        self.slug = '%s-%d-%d' % (slugify(self.title), self.row, self.col)
        super(Widget, self).save(*args, **kwargs)

    @classmethod
    def create(cls, *args, **kwargs):
        options = kwargs.pop('options', None)
        table = kwargs.pop('table', None)

        w = Widget(*args, **kwargs)
        w.compute_row_col()

        if options:
            w.options = JsonDict(options)

        w.save()

        if table:
            w.tables.add(table)

        return w

    def get_definition(self, criteria):
        """Get dict of widget attributes for sending via JSON."""
        report = self.section.report

        widget_def = {
            "widgettype":
            self.widgettype().split("."),
            "posturl":
            reverse('widget-job-list',
                    args=(report.namespace, report.slug, self.slug)),
            "updateurl":
            reverse('widget-criteria',
                    args=(report.namespace, report.slug, self.slug)),
            "options":
            self.uioptions,
            "widgetid":
            self.id,
            "widgetslug":
            self.slug,
            "row":
            self.row,
            "width":
            self.width,
            "height":
            self.height,
            "criteria":
            criteria,
        }

        return widget_def

    def widgettype(self):
        return '%s.%s' % (self.module.split('.')[-1], self.uiwidget)

    def table(self, i=0):
        return self.tables.all()[i]

    def compute_row_col(self):
        rowmax = self.section.report.widgets().aggregate(Max('row'))
        row = rowmax['row__max']
        if row is None:
            row = 1
            col = 1
        elif self.stack_widget:
            # This widget needs to be stacked below the previous widget
            pre_w = self.section.report.widgets().order_by('-row', '-col')[0]
            if pre_w.width != self.width:
                raise ValueError("The stack widget with title '%s' should set "
                                 "with width %s." % (self.title, pre_w.width))
            elif pre_w.title.lower() == self.title.lower():
                raise ValueError("The stack widget title '%s' is the same as "
                                 "the previous widget, thus should be "
                                 "changed." % self.title)
            row = pre_w.row
            col = pre_w.col
        else:
            widthsum = (self.section.report.widgets().filter(
                row=row).aggregate(Sum('width')))
            width = widthsum['width__sum']
            if width + self.width > 12:
                row += 1
                col = 1
            else:
                col = width + 1

        self.row = row
        self.col = col

    def collect_fields(self):
        # Gather up all fields
        fields = OrderedDict()

        # All fields attached to the section's report
        for f in self.section.report.fields.all().order_by('id'):
            fields[f.keyword] = f

        # All fields attached to the section
        for f in self.section.fields.all().order_by('id'):
            if f.keyword not in fields:
                fields[f.keyword] = f

        # All fields attached to any Widget's Tables
        for w in self.section.widget_set.all().order_by('id'):
            for t in w.tables.all():
                for f in t.fields.all().order_by('id'):
                    if f.keyword not in fields:
                        fields[f.keyword] = f

        return fields

コード例 #11

ファイルを表示

ファイル: models.py プロジェクト: gwenblum/steelscript-appfwk

class ReportHistory(models.Model):
    """ Define a record history of running report."""
    namespace = models.CharField(max_length=50)
    slug = models.CharField(max_length=50)
    bookmark = models.CharField(max_length=400)
    first_run = models.DateTimeField()
    last_run = models.DateTimeField()
    job_handles = models.TextField()
    user = models.CharField(max_length=50)
    criteria = PickledObjectField()
    run_count = models.IntegerField()

    status_choices = ((ReportStatus.NEW, "New"), (ReportStatus.RUNNING,
                                                  "Running"),
                      (ReportStatus.COMPLETE, "Complete"), (ReportStatus.ERROR,
                                                            "Error"))

    status = models.IntegerField(default=ReportStatus.NEW,
                                 choices=status_choices)

    @classmethod
    def create(cls, **kwargs):
        """ Create a new report history object and save it to database.
        :param str namespace: name of one set of report slugs
        :param str slug: the slug of the report
        :param str bookmark: the bookmark link of the report
        :param datetime last_run: Time when the report with the same criteria
          ran at the first time
        :param datetime last_run: Time when the report with the same criteria
          ran last time
        :param str job_handles: comma separated job handle strings of the
          report
        :param str user: name of the user who ran the report
        :param dict criteria: criteria fields that the report is running with
        :param int run_count: the number of times the report has run with the
          same criteria
        :return: the created report history object
        """
        job_handles = kwargs.get('job_handles')
        try:
            rh_obj = cls.objects.get(job_handles=job_handles)
        except ObjectDoesNotExist:
            rh_obj = cls(**kwargs)
            rh_obj.save()
        else:
            with TransactionLock(rh_obj, '%s_create' % rh_obj):
                rh_obj.status = ReportStatus.NEW
                rh_obj.last_run = kwargs.get('last_run')
                rh_obj.run_count += 1
                rh_obj.save()
        finally:
            return

    def __unicode__(self):
        return ("<Report History %s %s/%s>" %
                (self.id, self.namespace, self.slug))

    def __repr__(self):
        return unicode(self)

    def update_status(self, status):
        if self.status != status:
            with TransactionLock(self, '%s.update_status' % self):
                self.status = status
                self.save()

    def format_ts(self, ts):
        ltime = timezone.localtime(ts)
        return ltime.strftime("%Y/%m/%d %H:%M:%S")

    @property
    def format_last_run(self):
        return self.format_ts(self.last_run)

    @property
    def format_first_run(self):
        return self.format_ts(self.first_run)

    @property
    def status_name(self):
        return self.status_choices[self.status][1]

    @property
    def criteria_html(self):
        # length of business_hours_weekends.
        # current longest field
        tr_line = '<tr><td><b>{0}</b>:&nbsp;</td><td>{1}</td></tr>'
        cprops = self.criteria.keys()
        cprops.sort()
        rstr = '<table>'
        for k in cprops:
            rstr += tr_line.format(k, self.criteria[k])
        rstr += '</table>'
        # logger.debug("criteria_html: {0}".format(rstr))
        return rstr

コード例 #12

ファイルを表示

ファイル: models.py プロジェクト: tagur87/steelscript-appfwk

class Column(models.Model):

    table = models.ForeignKey(Table)
    name = models.CharField(max_length=300)
    label = models.CharField(max_length=300, null=True)
    position = models.IntegerField(default=1)
    options = PickledObjectField()

    iskey = models.BooleanField(default=False)

    synthetic = models.BooleanField(default=False)

    # Ephemeral columns are columns added to a table at run-time
    ephemeral = models.ForeignKey('jobs.Job', null=True)

    compute_post_resample = models.BooleanField(default=False)
    compute_expression = models.CharField(max_length=300)
    resample_operation = models.CharField(max_length=300, default='sum')

    DATATYPE_FLOAT = 0
    DATATYPE_INTEGER = 1
    DATATYPE_TIME = 2
    DATATYPE_STRING = 3
    DATATYPE_HTML = 4
    DATATYPE_DATE = 5
    DATATYPE_INTEGER64 = 6

    datatype = models.IntegerField(
        default=DATATYPE_FLOAT,
        choices=((DATATYPE_FLOAT, "float"), (DATATYPE_INTEGER, "integer"),
                 (DATATYPE_TIME, "time"), (DATATYPE_STRING,
                                           "string"), (DATATYPE_HTML, "html"),
                 (DATATYPE_DATE, "date"), (DATATYPE_INTEGER64, "integer64")))

    UNITS_NONE = 0
    UNITS_SECS = 1
    UNITS_MSECS = 2
    UNITS_BYTES = 3
    UNITS_BYTES_PER_SEC = 4
    UNITS_PCT = 5
    UNITS_BITS = 6
    UNITS_BITS_PER_SEC = 7
    UNITS_SECS_VERBOSE = 8
    UNITS_MSECS_VERBOSE = 9
    UNITS_BYTES_VERBOSE = 10
    UNITS_BYTES_PER_SEC_VERBOSE = 11
    UNITS_PCT_VERBOSE = 12
    UNITS_BITS_VERBOSE = 13
    UNITS_BITS_PER_SEC_VERBOSE = 14
    units = models.IntegerField(
        default=UNITS_NONE,
        choices=((UNITS_NONE, "none"), (UNITS_SECS, "s"), (UNITS_MSECS, "ms"),
                 (UNITS_BYTES, "B"), (UNITS_BYTES_PER_SEC, "B/s"), (UNITS_PCT,
                                                                    "pct"),
                 (UNITS_BITS, "b"), (UNITS_BITS_PER_SEC,
                                     "b/s"), (UNITS_SECS_VERBOSE, "seconds"),
                 (UNITS_MSECS_VERBOSE, "milliseconds"), (UNITS_BYTES_VERBOSE,
                                                         "bytes"),
                 (UNITS_BYTES_PER_SEC_VERBOSE,
                  "bytes/second"), (UNITS_PCT_VERBOSE, "percent"),
                 (UNITS_BITS_VERBOSE, "bits"), (UNITS_BITS_PER_SEC_VERBOSE,
                                                "bits/second")))

    formatter = models.TextField(null=True, blank=True)

    # default options to populate options field
    COLUMN_OPTIONS = {}
    POS_MAX = 0

    def __unicode__(self):
        return "<Column %s (%s)>" % (str(self.id), self.name)

    def __repr__(self):
        return unicode(self)

    def save(self, *args, **kwargs):
        if self.label is None:
            self.label = self.name
        super(Column, self).save()

    @classmethod
    def create(cls,
               table,
               name,
               label=None,
               datatype=DATATYPE_FLOAT,
               units=UNITS_NONE,
               iskey=False,
               position=None,
               **kwargs):

        column_options = copy.deepcopy(cls.COLUMN_OPTIONS)

        keys = kwargs.keys()
        cp = dict((k, kwargs.pop(k)) for k in keys if k in column_options)
        column_options.update(**cp)

        if column_options:
            options = JsonDict(default=column_options)
        else:
            options = None

        keys = kwargs.keys()
        ckeys = [f.name for f in Column._meta.local_fields]
        col_kwargs = dict((k, kwargs.pop(k)) for k in keys if k in ckeys)

        if kwargs:
            raise AttributeError('Invalid keyword arguments: %s' % str(kwargs))

        ephemeral = col_kwargs.get('ephemeral', None)
        if len(
                Column.objects.filter(
                    table=table, name=name, ephemeral=ephemeral)) > 0:
            raise ValueError("Column %s already in use for table %s" %
                             (name, str(table)))

        datatype = check_field_choice(cls, 'datatype', datatype)
        units = check_field_choice(cls, 'units', units)

        c = Column(table=table,
                   name=name,
                   label=label,
                   datatype=datatype,
                   units=units,
                   iskey=iskey,
                   options=options,
                   **col_kwargs)

        try:
            c.save()
        except DatabaseError as e:
            if 'no such table' in str(e):
                msg = str(e) + ' -- did you forget class Meta: proxy=True?'
                raise DatabaseError(msg)
            raise

        c.position = position or c.id
        c.save()

        return c

    def isnumeric(self):
        return self.datatype in (self.DATATYPE_FLOAT, self.DATATYPE_INTEGER,
                                 self.DATATYPE_INTEGER64)

    def istime(self):
        return self.datatype == self.DATATYPE_TIME

    def isdate(self):
        return self.datatype == self.DATATYPE_DATE

    def isstring(self):
        return self.datatype == self.DATATYPE_STRING

    def units_str(self):
        if self.units == self.UNITS_NONE:
            return None
        return field_choice_str(self, 'units', self.units)

コード例 #13

ファイルを表示

ファイル: models.py プロジェクト: tagur87/steelscript-appfwk

class TableField(models.Model):
    """
    Defines a single field associated with a table.

    TableFields define the the parameters that are used by a Table
    at run time.  The Table.fields attribute associates one
    or more fields with the table.

    At run time, a Criteria object binds values to each field.  The
    Criteria object has an attribute matching each associated TableField
    keyword.

    When defining a TableField, the following model attributes
    may be specified:

    :param keyword: short identifier used like a variable name, this must
        be unique per table

    :param label: text label displayed in user interfaces

    :param help_text: descriptive help text associated with this field

    :param initial: starting or default value to use in user interfaces

    :param required: boolean indicating if a non-null values must be provided

    :param hidden: boolean indicating if this field should be hidden in
        user interfaces, usually true when the value is computed from
        other fields via post_process_func or post_process_template

    :param field_cls: Django Form Field class to use for rendering.
        If not specified, this defaults to CharField

    :param field_kwargs: Dictionary of additional field specific
        kwargs to pass to the field_cls constructor.

    :param parents: List of parent keywords that this field depends on
        for a final value.  Used in conjunction with either
        post_process_func or post_process_template.

    :param pre_process_func: Function to call to perform any necessary
        preprocessing before rendering a form field or accepting
        user input.

    :param post_process_func: Function to call to perform any post
        submit processing.  This may be additional value cleanup
        or computation based on other form data.

    :param post_process_template: Simple string format style template
        to fill in based on other form criteria.
    """
    keyword = models.CharField(max_length=100)
    label = models.CharField(max_length=100, null=True, default=None)
    help_text = models.CharField(blank=True,
                                 null=True,
                                 default=None,
                                 max_length=400)
    initial = PickledObjectField(blank=True, null=True)
    required = models.BooleanField(default=False)
    hidden = models.BooleanField(default=False)

    field_cls = PickledObjectField(null=True)
    field_kwargs = PickledObjectField(blank=True, null=True)

    parent_keywords = SeparatedValuesField(null=True)

    pre_process_func = FunctionField(null=True)
    dynamic = models.BooleanField(default=False)
    post_process_func = FunctionField(null=True)
    post_process_template = models.CharField(null=True, max_length=500)

    @classmethod
    def create(cls, keyword, label=None, obj=None, **kwargs):
        parent_keywords = kwargs.pop('parent_keywords', None)
        if parent_keywords is None:
            parent_keywords = []

        field = cls(keyword=keyword, label=label, **kwargs)
        field.save()

        if field.post_process_template is not None:
            f = string.Formatter()
            for (_, parent_keyword, _,
                 _) in f.parse(field.post_process_template):
                if parent_keyword is not None:
                    parent_keywords.append(parent_keyword)

        field.parent_keywords = parent_keywords
        field.save()

        if obj is not None:
            obj.fields.add(field)
        return field

    def __unicode__(self):
        return "<TableField %s (%s)>" % (self.keyword, self.id)

    def __repr__(self):
        return unicode(self)

    def is_report_criteria(self, table):
        """ Runs through intersections of widgets to determine if this criteria
            is applicable to the passed table

            report  <-->  widgets  <-->  table
                |
                L- TableField (self)
        """
        wset = set(table.widget_set.all())
        rset = set(self.report_set.all())
        return any(
            wset.intersection(set(rwset.widget_set.all())) for rwset in rset)

    @classmethod
    def find_instance(cls, key):
        """ Return instance given a keyword. """
        params = TableField.objects.filter(keyword=key)
        if len(params) == 0:
            return None
        elif len(params) > 1:
            raise KeyError("Multiple TableField matches found for %s" % key)
        param = params[0]
        return param

コード例 #14

ファイルを表示

ファイル: models.py プロジェクト: tagur87/steelscript-appfwk

class Table(models.Model):
    name = models.CharField(max_length=200)

    # Table data is produced by a queryclassname defined within the
    # named module
    module = models.CharField(max_length=200)
    queryclassname = models.CharField(max_length=200)

    namespace = models.CharField(max_length=100)
    sourcefile = models.CharField(max_length=200)

    # list of column names
    sortcols = SeparatedValuesField(null=True)

    # list of asc/desc - must match len of sortcols
    sortdir = SeparatedValuesField(null=True)
    # Valid values for sort kwarg
    SORT_NONE = None
    SORT_ASC = 'asc'
    SORT_DESC = 'desc'

    rows = models.IntegerField(default=-1)
    filterexpr = models.CharField(null=True, max_length=400)

    # resample flag -- resample to the criteria.resolution
    # - this requires a "time" column
    resample = models.BooleanField(default=False)

    # options are typically fixed attributes defined at Table creation
    options = PickledObjectField()

    # list of fields that must be bound to values in criteria
    # that this table needs to run
    fields = models.ManyToManyField(TableField)

    # Default values for fields associated with this table, these
    # may be overridden by user criteria at run time
    criteria = PickledObjectField()

    # Function to call to tweak criteria for computing a job handle.
    # This must return a dictionary of key/value pairs of values
    # to use for computing a determining when a job must be rerun.
    criteria_handle_func = FunctionField(null=True)

    # Indicates if data can be cached
    cacheable = models.BooleanField(default=True)

    @classmethod
    def to_ref(cls, arg):
        """ Generate a table reference.

        :param arg: may be either a Table object, table id,
            or dictionary reference.

        """

        if isinstance(arg, dict):
            if 'namespace' not in arg or 'name' not in arg:
                msg = 'Invalid table ref as dict, expected namespace/name'
                raise KeyError(msg)
            return arg

        if isinstance(arg, Table):
            table = arg
        elif hasattr(arg, 'table'):
            # Datasource table
            table = arg.table
        elif isinstance(arg, int):
            table = Table.objects.get(id=arg)
        else:
            raise ValueError('No way to handle Table arg of type %s' %
                             type(arg))
        return {
            'sourcefile': table.sourcefile,
            'namespace': table.namespace,
            'name': table.name
        }

    @classmethod
    def from_ref(cls, ref):
        try:
            return Table.objects.get(sourcefile=ref['sourcefile'],
                                     namespace=ref['namespace'],
                                     name=ref['name'])
        except ObjectDoesNotExist:
            logger.exception(
                'Failed to resolve table ref: %s/%s/%s' %
                (ref['sourcefile'], ref['namespace'], ref['name']))
            raise

    def __unicode__(self):
        return "<Table %s (%s)>" % (str(self.id), self.name)

    def __repr__(self):
        return unicode(self)

    @property
    def queryclass(self):
        # Lookup the query class for the table associated with this task
        try:
            i = importlib.import_module(self.module)
            queryclass = i.__dict__[self.queryclassname]
        except:
            raise DatasourceException(
                "Could not lookup queryclass %s in module %s" %
                (self.queryclassname, self.module))

        return queryclass

    def get_columns(self, synthetic=None, ephemeral=None, iskey=None):
        """
        Return the list of columns for this table.

        `synthetic` is tri-state: None (default) is don't care,
            True means only synthetic columns, False means
            only non-synthetic columns

        `ephemeral` is a job reference.  If specified, include
            ephemeral columns related to this job

        `iskey` is tri-state: None (default) is don't care,
            True means only key columns, False means
            only non-key columns

        """

        filtered = []
        for c in Column.objects.filter(table=self).order_by(
                'position', 'name'):
            if synthetic is not None and c.synthetic != synthetic:
                continue
            if c.ephemeral is not None and c.ephemeral != ephemeral:
                continue
            if iskey is not None and c.iskey != iskey:
                continue
            filtered.append(c)

        return filtered

    def copy_columns(self,
                     table,
                     columns=None,
                     except_columns=None,
                     synthetic=None,
                     ephemeral=None):
        """ Copy the columns from `table` into this table.

        This method will copy all the columns from another table, including
        all attributes as well as sorting.

        """

        if not isinstance(table, Table):
            table = Table.from_ref(table)

        sortcols = []
        sortdir = []
        for c in table.get_columns(synthetic=synthetic, ephemeral=ephemeral):
            if columns is not None and c.name not in columns:
                continue
            if except_columns is not None and c.name in except_columns:
                continue

            if table.sortcols and (c.name in table.sortcols):
                sortcols.append(c.name)
                sortdir.append(table.sortdir[table.sortcols.index(c.name)])

            c.pk = None
            c.table = self

            c.save()

            # Allocate an id, use that as the position
            c.position = c.id
            c.save()

        if sortcols:
            self.sortcols = sortcols
            self.sortdir = sortdir
            self.save()

    def compute_synthetic(self, job, df):
        """ Compute the synthetic columns from DF a two-dimensional array
            of the non-synthetic columns.

            Synthesis occurs as follows:

            1. Compute all synthetic columns where compute_post_resample
               is False

            2. If the table is a time-based table with a defined resolution,
               the result is resampled.

            3. Any remaining columns are computed.
        """
        if df is None:
            return None

        all_columns = job.get_columns()
        all_col_names = [c.name for c in all_columns]

        def compute(df, syncols):
            for syncol in syncols:
                expr = syncol.compute_expression
                g = tokenize.generate_tokens(StringIO(expr).readline)
                newexpr = ""
                getvalue = False
                getclose = False
                for ttype, tvalue, _, _, _ in g:
                    if getvalue:
                        if ttype != tokenize.NAME:
                            msg = "Invalid syntax, expected {name}: %s" % tvalue
                            raise ValueError(msg)
                        elif tvalue in all_col_names:
                            newexpr += "df['%s']" % tvalue
                        elif tvalue in job.criteria:
                            newexpr += '"%s"' % str(job.criteria.get(tvalue))
                        else:
                            raise ValueError("Invalid variable name: %s" %
                                             tvalue)

                        getclose = True
                        getvalue = False
                    elif getclose:
                        if ttype != tokenize.OP and tvalue != "}":
                            msg = "Invalid syntax, expected {name}: %s" % tvalue
                            raise ValueError(msg)
                        getclose = False
                    elif ttype == tokenize.OP and tvalue == "{":
                        getvalue = True
                    else:
                        newexpr += tvalue
                    newexpr += ' '
                try:
                    df[syncol.name] = eval(newexpr)
                except NameError as e:
                    m = (('%s: expression failed: %s, check '
                          'APPFWK_SYNTHETIC_MODULES: %s') %
                         (self, newexpr, str(e)))
                    logger.exception(m)
                    raise TableComputeSyntheticError(m)

        # 1. Compute synthetic columns where post_resample is False
        compute(df, [
            col for col in all_columns
            if (col.synthetic and col.compute_post_resample is False)
        ])

        # 2. Resample
        colmap = {}
        timecol = None
        for col in all_columns:
            colmap[col.name] = col
            if col.istime():
                timecol = col.name

        if self.resample:
            if timecol is None:
                raise (TableComputeSyntheticError(
                    "%s: 'resample' is set but no 'time' column'" % self))

            if (('resolution' not in job.criteria)
                    and ('resample_resolution' not in job.criteria)):
                raise (TableComputeSyntheticError(
                    ("%s: 'resample' is set but criteria missing " +
                     "'resolution' or 'resample_resolution'") % self))

            how = {}
            for k in df.keys():
                if k == timecol or k not in colmap:
                    continue

                how[k] = colmap[k].resample_operation

            if 'resample_resolution' in job.criteria:
                resolution = job.criteria.resample_resolution
            else:
                resolution = job.criteria.resolution

            resolution = timedelta_total_seconds(resolution)
            if resolution < 1:
                raise (TableComputeSyntheticError(
                    ("Table %s cannot resample at a resolution " +
                     "less than 1 second") % self))

            logger.debug('%s: resampling to %ss' % (self, int(resolution)))

            indexed = df.set_index(timecol)

            resampled = indexed.resample('%ss' % int(resolution),
                                         how,
                                         convention='end').reset_index()
            df = resampled

        # 3. Compute remaining synthetic columns (post_resample is True)
        compute(df, [
            c for c in all_columns
            if (c.synthetic and c.compute_post_resample is True)
        ])

        return df

コード例 #15

ファイルを表示

ファイル: models.py プロジェクト: gwenblum/steelscript-appfwk

class Job(models.Model):

    # Timestamp when the job was created
    created = models.DateTimeField(auto_now_add=True)

    # Timestamp the last time the job was accessed
    touched = models.DateTimeField(auto_now_add=True)

    # Number of references to this job
    refcount = models.IntegerField(default=0)

    # Parent job that spawned this job (and thus waiting for
    # this jobs results)
    parent = models.ForeignKey('self', null=True, related_name='children')

    # Master job that has run (or is running) that has the same
    # criteria.  If master, this job is a "follower"
    master = models.ForeignKey('self', null=True, related_name='followers')

    # Table associated with this job
    table = models.ForeignKey(Table)

    # Criteria used to start this job - an instance of the Criteria class
    criteria = PickledObjectField(null=True)

    # Actual criteria as returned by the job after running
    actual_criteria = PickledObjectField(null=True)

    # Unique handle for the job
    handle = models.CharField(max_length=100, default="")

    # Job status
    NEW = 0
    QUEUED = 1
    RUNNING = 2
    COMPLETE = 3
    ERROR = 4

    status = models.IntegerField(default=NEW,
                                 choices=((NEW, "New"), (QUEUED, "Queued"),
                                          (RUNNING, "Running"),
                                          (COMPLETE, "Complete"), (ERROR,
                                                                   "Error")))

    # Process ID for original Task thread
    pid = models.IntegerField(default=None, null=True)

    # Message if job complete or error
    message = models.TextField(default="")

    # If an error comes from a Python exception, this will contain the full
    # exception text with traceback.
    exception = models.TextField(default="")

    # Whether to update detailed progress
    update_progress = models.BooleanField(default=True)

    # Callback function
    callback = CallableField()

    # Manager class for additional .objects methods
    objects = JobManager()

    def __unicode__(self):
        return "<Job %s (%8.8s) - t%s>" % (self.id, self.handle, self.table.id)

    def __repr__(self):
        return unicode(self)

    def json(self, data=None):
        """ Return a JSON representation of this Job. """
        return {
            'id': self.id,
            'handle': self.handle,
            'progress': self.progress,
            'status': self.status,
            'message': self.message,
            'exception': self.exception,
            'data': data
        }

    @property
    def progress(self):
        progress = progressd.get(self.id, 'progress')
        logger.debug('***PROGRESS: %s: %s' % (self.id, progress))
        return int(progress)

    @property
    def is_child(self):
        return self.parent is not None

    @property
    def is_follower(self):
        return self.master is not None

    def reference(self, message=""):
        with TransactionLock(self, '%s.reference' % self):
            pk = self.pk
            Job.objects.filter(pk=pk).update(refcount=F('refcount') + 1)
        # logger.debug("%s: reference(%s) @ %d" %
        #             (self, message, Job.objects.get(pk=pk).refcount))

    def dereference(self, message=""):
        with TransactionLock(self, '%s.dereference' % self):
            pk = self.pk
            Job.objects.filter(pk=pk).update(refcount=F('refcount') - 1)
        # logger.debug("%s: dereference(%s) @ %d" %
        #             (self, message, Job.objects.get(pk=pk).refcount))

    def refresh(self):
        """ Refresh dynamic job parameters from the database. """
        # fix bug 227119, by avoiding mysql caching problems
        # http://stackoverflow.com/a/7028362
        # should be fixed in Django 1.6
        # XXXCJ -- can we drop this now?
        Job.objects.update()
        job = Job.objects.get(pk=self.pk)
        for k in [
                'status', 'message', 'exception', 'actual_criteria', 'touched',
                'refcount', 'callback', 'parent'
        ]:
            setattr(self, k, getattr(job, k))

    def safe_update(self, **kwargs):
        """ Update the job with the passed dictionary in a database safe way.

        This method updates only the requested paraemters and refreshes
        the rest from the database.  This should be used for all updates
        to Job's to ensure that unmodified keys are not accidentally
        clobbered by doing a blanket job.save().

        """
        logger.debug("%s safe_update %s" % (self, kwargs))

        with TransactionLock(self, '%s.safe_update' % str(self)):
            Job.objects.filter(pk=self.pk).update(**kwargs)
            self.refresh()

    @classmethod
    def create(cls, table, criteria, update_progress=True, parent=None):

        # Adjust the criteria for this specific table, locking
        # down start/end times as needed
        criteria = criteria.build_for_table(table)
        try:
            criteria.compute_times()
        except ValueError:
            # Ignore errors, this table may not have start/end times
            pass

        # Compute the handle -- this will take into account
        # cacheability
        handle = Job._compute_handle(table, criteria)

        # Grab a lock on the row associated with the table
        with TransactionLock(table, "Job.create"):
            # Look for another job by the same handle in any state except ERROR
            master = Job.objects.get_master(handle)

            job = Job(table=table,
                      criteria=criteria,
                      actual_criteria=None,
                      status=Job.NEW,
                      pid=os.getpid(),
                      handle=handle,
                      parent=parent,
                      master=master,
                      update_progress=update_progress,
                      message='',
                      exception='')
            job.save()

            if master:
                master.reference("Master link from job %s" % job)
                now = datetime.datetime.now(tz=pytz.utc)
                master.safe_update(touched=now)

                logger.info("%s: New job for table %s, linked to master %s" %
                            (job, table.name, master))
            else:
                logger.info("%s: New job for table %s" % (job, table.name))

            # Create new instance in progressd as part of same Transaction
            p = {
                'job_id': job.id,
                'status': job.status,
                'progress': 0,
                'master_id': job.master.id if job.master else 0,
                'parent_id': job.parent.id if job.parent else 0
            }
            logger.debug('***Creating Job to progressd: %s' % p)
            progressd.post(**p)

            # End of TransactionLock

        logger.debug("%s: criteria = %s" % (job, criteria))

        return job

    def start(self, method=None, method_args=None):
        """ Start this job. """

        with TransactionLock(self.table, '%s.start' % self):
            logger.info("%s: Job starting" % self)
            self.refresh()

            if self.is_follower:
                logger.debug("%s: Shadowing master job %s" %
                             (self, self.master))
                if self.master.status == Job.COMPLETE:
                    self.mark_complete()
                elif self.master.status == Job.ERROR:
                    self.mark_error(self.master.message, self.master.exception)

                return

        if method is None:
            method = self.table.queryclass.run

        # Create an task to do the work
        task = Task(self, Callable(method, method_args))
        logger.debug("%s: Created task %s" % (self, task))
        task.start()

    def schedule(self, jobs, callback):
        jobid_map = {}
        for name, job in jobs.iteritems():
            jobid_map[name] = job.id

        logger.debug("%s: Setting callback %s" % (self, callback))
        self.safe_update(callback=Callable(callback))
        logger.debug("%s: Done setting callback %s" % (self, self.callback))

        for name, job in jobs.iteritems():
            job.start()

    def check_children(self, objlock=None):
        # get a lock on the child that's called us to ensure any status
        # from its transaction will be seen.
        if objlock is None:
            objlock = self

        with TransactionLock(objlock, '%s.checking_children' % self):
            running_children = Job.objects.filter(
                parent=self, status__in=[Job.NEW, Job.RUNNING])

        logger.info("%s: %d running children" % (self, len(running_children)))
        logger.debug(
            "%s: all children: %s" %
            (self, ';'.join('%s - %s' % (j.status, j)
                            for j in Job.objects.filter(parent=self))))

        if len(running_children) > 0:
            # Not done yet, do nothing
            return

        # Grab a lock on this job to make sure only one caller
        # gets the callback
        with TransactionLock(self, '%s.check_children' % self):
            # Now that we have the lock, make sure we have latest Job
            # details
            self.refresh()

            logger.info("%s: checking callback %s" % (self, self.callback))
            if self.callback is None:
                # Some other child got to it first
                return

            # Save off the callback, we'll call it outside the transaction
            callback = self.callback

            # Clear the callback while still in lockdown
            self.callback = None
            self.save()

        t = Task(self, callback=callback)
        logger.info("%s: Created callback task %s" % (self, t))
        t.start()

    def done(self):
        self.status = int(progressd.get(self.id, 'status'))
        if self.status in (Job.COMPLETE, Job.ERROR):
            self.refresh()

        return self.status in (Job.COMPLETE, Job.ERROR)

    def mark_progress(self, progress, status=None):
        if status is None:
            status = Job.RUNNING
        logger.debug('***SAVING PROGRESS for %s: %s/%s' %
                     (self.id, status, progress))
        progress = int(float(progress))
        try:
            progressd.put(self.id, status=status, progress=progress)
        except RvbdHTTPException as e:
            logger.debug('***Error saving progress for %s: %s' % (self.id, e))

    def mark_done(self, status, **kwargs):
        with TransactionLock(self, '%s.mark_done' % self):
            self.refresh()
            old_status = self.status
            if old_status in (Job.COMPLETE, Job.ERROR):
                # Status was already set to a done state, avoid
                # double action and return now
                return
            self.status = status
            for k, v in kwargs.iteritems():
                setattr(self, k, v)
            self.save()

        # On status change, do more...
        self.mark_progress(status=status, progress=100)

        if not self.is_follower:
            # Notify followers of this job
            followers = Job.objects.filter(master=self)
            for follower in followers:
                if self.status == Job.COMPLETE:
                    kwargs['actual_criteria'] = self.actual_criteria
                    follower.mark_complete(status=status, **kwargs)

                elif self.status == Job.ERROR:
                    follower.mark_done(status=status, **kwargs)

        if self.parent:
            logger.info("%s: Asking parent %s to check children" %
                        (self, self.parent))
            t = Task(self.parent,
                     callback=Callable(self.parent.check_children,
                                       called_kwargs={'objlock': self}),
                     generic=True)
            logger.info("%s: Created check_children task %s" % (self, t))
            t.start()

        return True

    def mark_complete(self, data=None, **kwargs):
        logger.info("%s: complete" % self)
        if data is not None:
            self._save_data(data)

        kwargs['status'] = Job.COMPLETE
        kwargs['message'] = ''

        if (self.actual_criteria is None and 'actual_criteria' not in kwargs):
            kwargs['actual_criteria'] = self.criteria

        self.mark_done(**kwargs)
        logger.info("%s: saved as COMPLETE" % self)

        # Send signal for possible Triggers
        post_data_save.send(sender=self, data=self.data, context={'job': self})

    def mark_error(self, message, exception=None):
        if exception is None:
            exception = ''

        logger.warning("%s failed: %s" % (self, message))

        self.mark_done(status=Job.ERROR, message=message, exception=exception)
        logger.info("%s: saved as ERROR" % self)

        # Send signal for possible Triggers
        error_signal.send(sender=self, context={'job': self})

    @classmethod
    def _compute_handle(cls, table, criteria):
        h = hashlib.md5()
        h.update(str(table.id))

        if table.cacheable and not criteria.ignore_cache:
            # XXXCJ - Drop ephemeral columns when computing the cache handle,
            # since the list of columns is modifed at run time.   Typical use
            # case is an analysis table which creates a time-series graph of
            # the top 10 hosts -- one column per host.  The host columns will
            # change based on the run of the dependent table.
            #
            # Including epheremal columns causes some problems because the
            # handle is computed before the query is actually run, so it never
            # matches.
            #
            # May want to dig in to this further and make sure this doesn't
            # pick up cache files when we don't want it to
            h.update('.'.join([c.name for c in table.get_columns()]))

            if table.criteria_handle_func:
                criteria = table.criteria_handle_func(criteria)

            for k, v in criteria.iteritems():
                # logger.debug("Updating hash from %s -> %s" % (k,v))
                h.update('%s:%s' % (k, v))
        else:
            # Table is not cacheable, instead use current time plus a random
            # value just to get a unique hash
            h.update(str(datetime.datetime.now()))
            h.update(str(random.randint(0, 10000000)))

        return h.hexdigest()

    def get_columns(self, ephemeral=None, **kwargs):
        """ Return columns assocated with the table for the job.

        The returned column set includes ephemeral columns associated
        with this job unless ephemeral is set to False.

        """
        if ephemeral is None:
            kwargs['ephemeral'] = self.master or self
        return self.table.get_columns(**kwargs)

    def _save_data(self, data):
        if isinstance(data, list) and len(data) > 0:
            # Convert the result to a dataframe
            columns = [col.name for col in self.get_columns(synthetic=False)]
            df = pandas.DataFrame(data, columns=columns)
        elif ((data is None) or (isinstance(data, list) and len(data) == 0)):
            df = None
        elif isinstance(data, pandas.DataFrame):
            df = data
        else:
            raise ValueError("Unrecognized query result type: %s" % type(data))

        if df is not None:
            self.check_columns(df)
            df = self.normalize_types(df)
            df = self.table.compute_synthetic(self, df)

            # Sort according to the defined sort columns
            if self.table.sortcols:
                sorted = df.sort_values(self.table.sortcols,
                                        ascending=[
                                            b == Table.SORT_ASC
                                            for b in self.table.sortdir
                                        ])
                # Move NaN rows of the first sortcol to the end
                n = self.table.sortcols[0]
                df = (sorted[sorted[n].notnull()].append(
                    sorted[sorted[n].isnull()]))

            if self.table.rows > 0:
                df = df[:self.table.rows]

        if df is not None:
            df.to_pickle(self.datafile())

            logger.debug("%s data saved to file: %s" %
                         (str(self), self.datafile()))
        else:
            logger.debug("%s no data saved, data is empty" % (str(self)))

        return df

    def datafile(self):
        """ Return the data file for this job. """
        return os.path.join(settings.DATA_CACHE, "job-%s.data" % self.handle)

    def data(self):
        """ Returns a pandas.DataFrame of data, or None if not available. """

        if not self.done():
            logger.warning("%s: job not complete, no data available" % self)
            raise DataError("Job not complete, no data available")

        self.reference("data()")

        e = None
        try:
            logger.debug("%s looking for data file: %s" %
                         (str(self), self.datafile()))
            if os.path.exists(self.datafile()):
                df = pandas.read_pickle(self.datafile())
                logger.debug("%s data loaded %d rows from file: %s" %
                             (str(self), len(df), self.datafile()))
            else:
                logger.debug("%s no data, missing data file: %s" %
                             (str(self), self.datafile()))
                df = None
        except Exception as e:
            logger.error("Error loading datafile %s for %s" %
                         (self.datafile(), str(self)))
            logger.error("Traceback:\n%s" % e)
        finally:
            self.dereference("data()")

        if e:
            raise e

        return df

    def values(self):
        """ Return data as a list of lists. """

        df = self.data()
        if df is not None:
            # Replace NaN with None
            df = df.where(pandas.notnull(df), None)

            # Extract tha values in the right order
            all_columns = self.get_columns()
            all_col_names = [c.name for c in all_columns]

            # Straggling numpy data types may cause problems
            # downstream (json encoding, for example), so strip
            # things down to just native ints and floats
            vals = []
            for row in df.ix[:, all_col_names].itertuples():
                vals_row = []
                for v in row[1:]:
                    if (isinstance(v, numpy.number)
                            or isinstance(v, numpy.bool_)):
                        v = numpy.asscalar(v)
                    vals_row.append(v)
                vals.append(vals_row)

        else:
            vals = []
        return vals

    def check_columns(self, df):
        for col in self.get_columns(synthetic=False):
            if col.name not in df:
                raise ValueError('Returned table missing expected column: %s' %
                                 col.name)

    def normalize_types(self, df):
        for col in self.get_columns(synthetic=False):
            s = df[col.name]
            if col.istime():
                # The column is supposed to be time,
                # make sure all values are datetime objects
                if str(s.dtype).startswith(str(pandas.np.dtype('datetime64'))):
                    # Already a datetime
                    pass
                elif str(s.dtype).startswith('int'):
                    # Assume this is a numeric epoch, convert to datetime
                    df[col.name] = s.astype('datetime64[s]')
                elif str(s.dtype).startswith('float'):
                    # This is a numeric epoch as a float, possibly
                    # has subsecond resolution, convert to
                    # datetime but preserve up to millisecond
                    df[col.name] = (1000 * s).astype('datetime64[ms]')
                else:
                    # Possibly datetime object or a datetime string,
                    # hopefully astype() can figure it out
                    df[col.name] = s.astype('datetime64[ms]')

                # Make sure we are UTC, must use internal tzutc because
                # pytz timezones will cause an error when unpickling
                # https://github.com/pydata/pandas/issues/6871
                # -- problem appears solved with latest pandas
                utc = pytz.utc
                try:
                    df[col.name] = df[col.name].apply(
                        lambda x: x.tz_localize(utc))
                except TypeError as e:
                    if e.message.startswith('Cannot localize'):
                        df[col.name] = df[col.name].apply(
                            lambda x: x.tz_convert(utc))
                    else:
                        raise
            elif col.isdate():
                if str(s.dtype).startswith(str(pandas.np.dtype('datetime64'))):
                    # Already a datetime
                    pass
                elif str(s.dtype).startswith('int'):
                    # Assume this is a numeric epoch, convert to datetime
                    df[col.name] = s.astype('datetime64[s]')
                elif str(s.dtype).startswith('float'):
                    # This is a numeric epoch as a float, possibly
                    # has subsecond resolution, convert to
                    # datetime but preserve up to millisecond
                    df[col.name] = (1000 * s).astype('datetime64[ms]')
                elif str(s.dtype).startswith('object'):
                    # This is a datetime.date object, convert it to
                    # datetime64[ns], tried to obtain datetime64[s]
                    # by setting unit='s' but failed
                    # It seems datetime64[ns] is accepted.
                    df[col.name] = pandas.to_datetime(s)
                else:
                    # Possibly datetime object or a datetime string,
                    # hopefully astype() can figure it out
                    df[col.name] = s.astype('datetime64[ms]')
            elif (col.isnumeric() and s.dtype == pandas.np.dtype('object')):
                # The column is supposed to be numeric but must have
                # some strings.  Try replacing empty strings with NaN
                # and see if it converts to float64
                try:
                    df[col.name] = (s.replace('', pandas.np.NaN).astype(
                        pandas.np.float64))
                except ValueError:
                    # This may incorrectly be tagged as numeric
                    pass

        return df

    def combine_filterexprs(self, joinstr="and", exprs=None):
        self.refresh()

        if exprs is None:
            exprs = []
        elif type(exprs) is not list:
            exprs = [exprs]

        exprs.append(self.table.filterexpr)

        nonnull_exprs = []
        for e in exprs:
            if e != "" and e is not None:
                nonnull_exprs.append(e)

        if len(nonnull_exprs) > 1:
            return "(" + (") " + joinstr + " (").join(nonnull_exprs) + ")"
        elif len(nonnull_exprs) == 1:
            return nonnull_exprs[0]
        else:
            return ""