class BinaryGrammar(Grammar):
	"""A grammar that builds rowdicts from binary data.

	The grammar expects the input to be in fixed-length records. 
	the actual specification of the fields is done via a binaryRecordDef
	element.
	"""
	name_ = "binaryGrammar"
	rowIterator = BinaryRowIterator

	_til = base.IntAttribute("skipBytes", 
		default=0, 
		description="Number of bytes to skip before parsing records.")
	
	_fdefs = base.StructAttribute("fieldDefs",
		description="Definition of the record.",
		childFactory=BinaryRecordDef)

	_armoring = base.EnumeratedUnicodeAttribute("armor",
		default=None,
		validValues=["fortran"],
		description="Record armoring; by default it's None meaning the"
			" data was dumped to the file sequentially.  Set it to fortran"
			" for fortran unformatted files (4 byte length before and after"
			" the payload).")
class REGrammar(Grammar, FileRowAttributes):
    """A grammar that builds rowdicts from records and fields specified
	via REs separating them.

	There is also a simple facility for "cleaning up" records.  This can be
	used to remove standard shell-like comments; use 
	``recordCleaner="(?:#.*)?(.*)"``.
	"""
    name_ = "reGrammar"

    rowIterator = REIterator

    _til = base.IntAttribute(
        "topIgnoredLines",
        default=0,
        description="Skip this many lines at the top of each source file.",
        copyable=True)
    _stopPat = REAttribute(
        "stopPat",
        default=None,
        description="Stop parsing when a record *matches* this RE (this"
        " is for skipping non-data footers",
        copyable=True)
    _recordSep = REAttribute(
        "recordSep",
        default=re.compile("\n"),
        description="RE for separating two records in the source.",
        copyable=True)
    _fieldSep = REAttribute(
        "fieldSep",
        default=re.compile(r"\s+"),
        description="RE for separating two fields in a record.",
        copyable=True)
    _commentPat = REAttribute(
        "commentPat",
        default=None,
        description="RE inter-record material to be ignored (note: make this"
        " match the entire comment, or you'll get random mess from partly-matched"
        " comments.  Use '(?m)^#.*$' for beginning-of-line hash-comments.",
        copyable=True)
    _recordCleaner = REAttribute(
        "recordCleaner",
        default=None,
        description="A regular expression matched against each record."
        " The matched groups in this RE are joined by blanks and used"
        " as the new pattern.  This can be used for simple cleaning jobs;"
        " However, records not matching recordCleaner are rejected.",
        copyable=True)
    _names = base.StringListAttribute(
        "names",
        description="Names for the parsed fields, in matching sequence.  You can"
        r" use macros here, e.g., \\colNames{someTable}.",
        expand=True,
        copyable=True)
    _lax = base.BooleanAttribute(
        "lax",
        description="allow more or less"
        " fields in source records than there are names",
        default=False,
        copyable=True)
Beispiel #3
0
class FITSTableGrammar(common.Grammar):
    """A grammar parsing from FITS tables.

	fitsTableGrammar result in typed records, i.e., values normally come
	in the types they are supposed to have.  Of course, that won't work
	for datetimes, STC-S regions, and the like.

	The keys of the result dictionaries are simpily the names given in
	the FITS.
	"""
    name_ = "fitsTableGrammar"

    _hduIndex = base.IntAttribute(
        "hdu",
        default=1,
        description="Take the data from this extension (primary=0)."
        " Tabular data typically resides in the first extension.")

    rowIterator = FITSTableIterator
class CSVGrammar(Grammar, FileRowAttributes):
    """A grammar that uses python's csv module to parse files.

	Note that these grammars by default interpret the first line of
	the input file as the column names.  When your files don't follow
	that convention, you *must* give names (as in ``names='raj2000,
	dej2000, magV'``), or you'll lose the first line and have silly
	column names.

	CSVGrammars currently do not support non-ASCII inputs.
	Contact the authors if you need that.
	"""
    name_ = "csvGrammar"

    _delimiter = base.UnicodeAttribute("delimiter",
                                       description="CSV delimiter",
                                       default=",",
                                       copyable=True)

    _names = base.StringListAttribute(
        "names",
        default=None,
        description="Names for the parsed fields, in sequence of the"
        " comma separated values.  The default is to read the field names"
        " from the first line of the csv file.  You can use macros here,"
        r" e.g., \\colNames{someTable}.",
        expand=True,
        copyable=True)

    _strip = base.BooleanAttribute(
        "strip",
        default=False,
        description="If True, whitespace immediately following a delimiter"
        " is ignored.",
        copyable=True)

    _til = base.IntAttribute(
        "topIgnoredLines",
        default=0,
        description="Skip this many lines at the top of each source file.")

    rowIterator = CSVIterator
class InputKey(column.ParamBase):
    """A description of a piece of input.

	Think of inputKeys as abstractions for input fields in forms, though
	they are used for services not actually exposing HTML forms as well.

	Some of the DDL-type attributes (e.g., references) only make sense here
	if columns are being defined from the InputKey.

	You can give a "defaultForForm" property on inputKeys to supply
	a string literal default that will be pre-filled in the form
	renderer and is friends but not for other renderers (like S*AP).

	Properties evaluated:

	* defaultForForm -- a value entered into form fields by default
	  (be stingy with those; while it's nice to not have to set things
	  presumably right for almost everyone, having to delete stuff
	  you don't want over and over is really annoying).
	* adaptToRenderer -- a true boolean literal here causes the param
	  to be adapted for the renderer (e.g., float could become vizierexpr-float).
		You'll usually not want this, because the expressions are 
		generally evaluated by the database, and the condDescs do the
		adaptation themselves.  This is mainly for rare situations like
		file uploads in custom cores.
	"""
    name_ = "inputKey"

    # XXX TODO: make widgetFactory and showItems properties.
    _widgetFactory = base.UnicodeAttribute(
        "widgetFactory",
        default=None,
        description="A python expression for a custom widget"
        " factory for this input,"
        " e.g., 'Hidden' or 'widgetFactory(TextArea, rows=15, cols=30)'",
        copyable=True)
    _showItems = base.IntAttribute(
        "showItems",
        default=3,
        description="Number of items to show at one time on selection widgets.",
        copyable=True)
    _inputUnit = base.UnicodeAttribute(
        "inputUnit",
        default=None,
        description="Override unit of the table column with this.",
        copyable=True)
    _std = base.BooleanAttribute(
        "std",
        default=False,
        description="Is this input key part of a standard interface for"
        " registry purposes?",
        copyable=True)
    _multiplicity = base.UnicodeAttribute(
        "multiplicity",
        default=None,
        copyable=True,
        description="Set"
        " this to single to have an atomic value (chosen at random"
        " if multiple input values are given),"
        " forced-single to have an atomic value"
        " and raise an exception if multiple values come in, or"
        " multiple to receive lists.  On the form renderer, this is"
        " ignored, and the values are what nevow formal passes in."
        " If not given, it is single unless there is a values element with"
        " options, in which case it's multiple.")

    # Don't validate meta for these -- while they are children
    # of validated structures (services), they don't need any
    # meta at all.  This should go as soon as we have a sane
    # inheritance hierarchy for tables.
    metaModel = None

    def completeElement(self, ctx):
        self._completeElementNext(InputKey, ctx)
        if self.restrictedMode and self.widgetFactory:
            raise base.RestrictedElement("widgetFactory")

    def onElementComplete(self):
        self._onElementCompleteNext(InputKey)
        # compute scaling if an input unit is given
        self.scaling = None
        if self.inputUnit:
            self.scaling = base.computeConversionFactor(
                self.inputUnit, self.unit)

        if self.multiplicity is None:
            self.multiplicity = "single"
            if self.isEnumerated():
                # these almost always want lists returned.
                self.multiplicity = "multiple"

    def onParentComplete(self):
        if self.parent and hasattr(self.parent, "required"):
            # children of condDescs inherit their requiredness
            # (unless defaulted)
            self.required = self.parent.required
        # but if there's a default, never require an input
        if self.value:
            self.required = False

    def validateValue(self, literal):
        """raises a ValidationError if literal cannot be deserialised into
		an acceptable value for self.
		"""
        self._parse(literal)

    def _getVOTableType(self):
        """returns the VOTable type for the param.

		The reason this is overridden is that historically, we've been
		cavalier about letting in multiple values for a single param
		(as in enumerated values and such).

		It's probably too late to fix this now, so for InputKeys with
		multiplicity multiple we're allowing arrays, too.
		"""
        type, arraysize, xtype = column.ParamBase._getVOTableType(self)

        if self.multiplicity == "multiple" and arraysize == '1':
            arraysize = "*"

        return type, arraysize, xtype

    @classmethod
    def fromColumn(cls, column, **kwargs):
        """returns an InputKey for query input to column.
		"""
        if isinstance(column, InputKey):
            if kwargs:
                return column.change(**kwargs)
            else:
                return column

        instance = cls(None)
        instance.feedObject("original", column)

        if column.isEnumerated():
            instance.feedObject("multiplicity", "multiple")

        for k, v in kwargs.iteritems():
            instance.feed(k, v)
        if not "required" in kwargs:
            instance.feedObject("required", False)
        return instance.finishElement(None)
class DirectGrammar(base.Structure, base.RestrictionMixin):
    """A user-defined external grammar.

	See the separate document on user-defined code on more on direct grammars.

	Also note the program gavomkboost that can help you generate core for
	the C boosters used by direct grammars.
	"""
    name_ = "directGrammar"

    _cbooster = rscdef.ResdirRelativeAttribute(
        "cBooster",
        default=base.Undefined,
        description="resdir-relative path to the booster C source.",
        copyable=True)

    _gzippedInput = base.BooleanAttribute(
        "gzippedInput",
        default=False,
        description="Pipe gzip before booster? (will not work for FITS)",
        copyable=True)

    _autoNull = base.UnicodeAttribute(
        "autoNull",
        default=None,
        description="Use this string as general NULL value (when reading"
        " from plain text).",
        copyable=True)

    _ignoreBadRecords = base.BooleanAttribute(
        "ignoreBadRecords",
        default=False,
        description="Let booster ignore invalid records?",
        copyable=True)

    _recordSize = base.IntAttribute(
        "recordSize",
        default=4000,
        description="For bin boosters, read this many bytes to make"
        " up a record; for line-based boosters, this is the maximum"
        " length of an input line.",
        copyable=True)

    _preFilter = base.UnicodeAttribute(
        "preFilter",
        default=None,
        description="Pipe input through this program before handing it to"
        " the booster; this string is shell-expanded (will not work for FITS).",
        copyable=True)

    _customFlags = base.UnicodeAttribute(
        "customFlags",
        default="",
        description="Pass these flags to the C compiler when building the"
        " booster.",
        copyable=True)

    _type = base.EnumeratedUnicodeAttribute(
        "type",
        default="col",
        validValues=["col", "bin", "fits", "split"],
        description="Make code for a booster parsing by column indices (col),"
        " by splitting along separators (split), by reading fixed-length"
        " binary records (bin), for from FITS binary tables (fits).",
        copyable=True)

    _splitChar = base.UnicodeAttribute(
        "splitChar",
        default="|",
        description="For split boosters, use this as the separator.",
        copyable=True)

    _ext = base.IntAttribute(
        "extension",
        default=1,
        description=
        "For FITS table boosters, get the table from this extension.",
        copyable=True)

    _mapKeys = base.StructAttribute(
        "mapKeys",
        childFactory=common.MapKeys,
        default=None,
        copyable=True,
        description="For a FITS booster, map DB table column names"
        " to FITS column names (e.g., if the FITS table name flx is to"
        " end up in the DB column flux, say flux:flx).")

    _rd = rscdef.RDAttribute()

    isDispatching = False

    def validate(self):
        self._validateNext(DirectGrammar)
        if self.type == 'bin':
            if not self.recordSize:
                raise base.StructureError(
                    "DirectGrammars reading from binary need"
                    " a recordSize attribute")
        if self.mapKeys is not None:
            if self.type != "fits":
                raise base.StructureError("mapKeys is only allowed for FITS"
                                          " boosters.")

    def onElementComplete(self):
        if self.type == "fits":
            if self.mapKeys:
                self.keyMap = self.mapKeys.maps
            else:
                self.keyMap = {}

    def getBooster(self):
        return CBooster(self.cBooster,
                        self.parent,
                        gzippedInput=self.gzippedInput,
                        preFilter=self.preFilter,
                        autoNull=self.autoNull,
                        ignoreBadRecords=self.ignoreBadRecords,
                        customFlags=self.customFlags)

    def parse(self, sourceToken, targetData=None):
        booster = self.getBooster()
        makes = self.parent.makes
        if len(makes) != 1:
            raise base.StructureError(
                "Directgrammar only works for data having"
                " exactly one table, but data '%s' has %d" %
                (self.parent.id, len(makes)))

        def copyIn(data):
            data.tables.values()[0].copyIn(booster.getOutput(sourceToken))
            if booster.getStatus():
                raise base.SourceParseError("Booster returned error signature",
                                            source=sourceToken)

        return copyIn
Beispiel #7
0
class ColumnGrammar(Grammar, FileRowAttributes):
    """A grammar that builds rowdicts out of character index ranges.

	This works by using the colRanges attribute like <col key="mag">12-16</col>,
	which will take the characters 12 through 16 inclusive from each input
	line to build the input column mag.

	As a shortcut, you can also use the colDefs attribute; it contains
	a string of the form {<key>:<range>}, i.e.,
	a whitespace-separated list of colon-separated items of key and range
	as accepted by cols, e.g.::
		
		<colDefs>
			a: 3-4
			_u: 7
		</colDefs>
	"""
    name_ = "columnGrammar"

    _til = base.IntAttribute(
        "topIgnoredLines",
        default=0,
        description="Skip this many lines at the top of each source file.",
        copyable=True)
    _cols = base.DictAttribute("colRanges",
                               description="Mapping of"
                               " source keys to column ranges.",
                               itemAttD=ColRangeAttribute("col"),
                               copyable=True)
    _colDefs = base.ActionAttribute("colDefs",
                                    description="Shortcut"
                                    " way of defining cols",
                                    methodName="_parseColDefs")
    _commentIntroducer = base.UnicodeAttribute(
        "commentIntroducer",
        default=base.NotGiven,
        description="A character sequence"
        " that, when found at the beginning of a line makes this line"
        " ignored",
        copyable=True)

    def _getColDefGrammar(self):
        with utils.pyparsingWhitechars("\n\t\r "):
            intLiteral = pyparsing.Word(pyparsing.nums)
            # need to manually swallow whitespace after literals
            blindWhite = pyparsing.Suppress(
                pyparsing.Optional(pyparsing.White()))
            dash = blindWhite + pyparsing.Literal("-") + blindWhite

            range = pyparsing.Combine(
                dash + blindWhite + intLiteral
                | intLiteral +
                pyparsing.Optional(dash + pyparsing.Optional(intLiteral)))
            range.setName("Column range")

            identifier = pyparsing.Regex(utils.identifierPattern.pattern[:-1])
            identifier.setName("Column key")

            clause = (identifier + pyparsing.Literal(":") + blindWhite +
                      range).addParseAction(lambda s, p, t: (t[0], t[2]))
            colDefs = pyparsing.ZeroOrMore(clause) + pyparsing.StringEnd()
            # range.setDebug(True);identifier.setDebug(True);clause.setDebug(True)
            return colDefs

    def _parseColDefs(self, ctx):
        # the handler for colDefs -- parse shortcut colDefs
        try:
            for key, range in utils.pyparseString(self._getColDefGrammar(),
                                                  self.colDefs):
                self.colRanges[key] = self._cols.itemAttD.parse(range)
        except pyparsing.ParseException, ex:
            raise base.LiteralParseError(
                "colDefs",
                self.colDefs,
                hint="colDefs is a whitespace-separated list of key:range pairs."
                " Your literal doesn't look like this, and here's what the"
                " parser had to complain: %s" % ex)
Beispiel #8
0
class ColumnBase(base.Structure, base.MetaMixin):
    """A base class for columns, parameters, output fields, etc.

	Actually, right now there's far too much cruft in here that 
	should go into Column proper or still somewhere else.  Hence:
	XXX TODO: Refactor.

	See also Column for a docstring that still applies to all we've in
	here.
	"""
    _name = ParamNameAttribute("name",
                               default=base.Undefined,
                               description="Name of the param",
                               copyable=True,
                               before="type")
    _type = TypeNameAttribute(
        "type",
        default="real",
        description="datatype for the column (SQL-like type system)",
        copyable=True,
        before="unit")
    _unit = base.UnicodeAttribute("unit",
                                  default="",
                                  description="Unit of the values",
                                  copyable=True,
                                  before="ucd",
                                  strip=True)
    _ucd = base.UnicodeAttribute("ucd",
                                 default="",
                                 description="UCD of the column",
                                 copyable=True,
                                 before="description")
    _description = base.NWUnicodeAttribute(
        "description",
        default="",
        copyable=True,
        description=
        "A short (one-line) description of the values in this column.")
    _tablehead = base.UnicodeAttribute(
        "tablehead",
        default=None,
        description="Terse phrase to put into table headers for this"
        " column",
        copyable=True)
    _utype = base.UnicodeAttribute("utype",
                                   default=None,
                                   description="utype for this column",
                                   copyable=True)
    _required = base.BooleanAttribute(
        "required",
        default=False,
        description="Record becomes invalid when this column is NULL",
        copyable=True)
    _displayHint = DisplayHintAttribute(
        "displayHint",
        description="Suggested presentation; the format is "
        " <kw>=<value>{,<kw>=<value>}, where what is interpreted depends"
        " on the output format.  See, e.g., documentation on HTML renderers"
        " and the formatter child of outputFields.",
        copyable=True)
    _verbLevel = base.IntAttribute(
        "verbLevel",
        default=20,
        description="Minimal verbosity level at which to include this column",
        copyable=True)
    _values = base.StructAttribute("values",
                                   default=None,
                                   childFactory=Values,
                                   description="Specification of legal values",
                                   copyable=True)
    _fixup = base.UnicodeAttribute(
        "fixup",
        description=
        "A python expression the value of which will replace this column's"
        " value on database reads.  Write a ___ to access the original"
        ' value.  You can use macros for the embedding table.'
        ' This is for, e.g., simple URL generation'
        ' (fixup="\'\\internallink{/this/svc}\'+___").'
        ' It will *only* kick in when tuples are deserialized from the'
        " database, i.e., *not* for values taken from tables in memory.",
        default=None,
        copyable=True)
    _note = base.UnicodeAttribute(
        "note",
        description="Reference to a note meta"
        " on this table explaining more about this column",
        default=None,
        copyable=True)
    _xtype = base.UnicodeAttribute("xtype",
                                   description="VOTable xtype giving"
                                   " the serialization form",
                                   default=None,
                                   copyable=True)
    _stc = TableManagedAttribute(
        "stc",
        description="Internally used"
        " STC information for this column (do not assign to unless instructed"
        " to do so)",
        default=None,
        copyable=True)
    _stcUtype = TableManagedAttribute(
        "stcUtype",
        description="Internally used"
        " STC information for this column (do not assign to)",
        default=None,
        copyable=True)
    _properties = base.PropertyAttribute(copyable=True)
    _original = base.OriginalAttribute()

    restrictedMode = False

    def __repr__(self):
        return "<Column %s>" % repr(self.name)

    def setMetaParent(self, parent):
        # columns should *not* take part in meta inheritance.  The reason is
        # that there are usually many columns to a table, and there's no
        # way I can see that any piece of metadata should be repeated in
        # all of them.  On the other hand, for votlinks (no name an example),
        # meta inheritance would have disastrous consequences.
        # So, we bend the rules a bit.
        raise base.StructureError(
            "Columns may not have meta parents.",
            hint="The rationale for this is explained in rscdef/column.py,"
            " look for setMetaParent.")

    def onParentComplete(self):
        # we need to resolve note on construction since columns are routinely
        # copied to other tables and  meta info does not necessarily follow.
        if isinstance(self.note, basestring):
            try:
                self.note = self.parent.getNote(self.note)
            except base.NotFoundError:  # non-existing notes silently ignored
                self.note = None

    def completeElement(self, ctx):
        self.restrictedMode = getattr(ctx, "restricted", False)
        if isinstance(self.name, utils.QuotedName):
            self.key = self.name.name
            if ')' in self.key:
                # No '()' allowed in key for that breaks the %()s syntax (sigh!).
                # Work around with the following quick hack that would break
                # if people carefully chose proper names.  Anyone using delim.
                # ids in SQL deserves a good spanking anyway.
                self.key = self.key.replace(')', "__").replace('(', "__")
        else:
            self.key = self.name
        self._completeElementNext(ColumnBase, ctx)

    def isEnumerated(self):
        return self.values and self.values.options

    def validate(self):
        self._validateNext(ColumnBase)
        if self.restrictedMode and self.fixup:
            raise base.RestrictedElement("fixup")

    def validateValue(self, value):
        """raises a ValidationError if value does not match the constraints
		given here.
		"""
        if value is None:
            if self.required:
                raise base.ValidationError(
                    "Field %s is empty but non-optional" % self.name,
                    self.name)
            return

        # Only validate these if we're not a database column
        if not isinstance(self, Column):
            vals = self.values
            if vals:
                if vals.options:
                    if value and not vals.validateOptions(value):
                        raise base.ValidationError(
                            "Value %s not consistent with"
                            " legal values %s" % (value, vals.options),
                            self.name)
                else:
                    if vals.min and value < vals.min:
                        raise base.ValidationError(
                            "%s too small (must be at least %s)" %
                            (value, vals.min), self.name)
                    if vals.max and value > vals.max:
                        raise base.ValidationError(
                            "%s too large (must be less than %s)" %
                            (value, vals.max), self.name)

    def isIndexed(self):
        """returns a guess as to whether this column is part of an index.

		This may return True, False, or None (unknown).
		"""
        if self.parent and hasattr(self.parent, "indexedColumns"):
            # parent is something like a TableDef
            if self.name in self.parent.indexedColumns:
                return True
            else:
                return False

    def isPrimary(self):
        """returns a guess as to whether this column is a primary key of the
		embedding table.

		This may return True, False, or None (unknown).
		"""
        if self.parent and hasattr(self.parent, "primary"):
            # parent is something like a TableDef
            if self.name in self.parent.primary:
                return True
            else:
                return False

    _indexedCleartext = {
        True: "indexed",
        False: "notIndexed",
        None: "unknown",
    }

    def asInfoDict(self):
        """returns a dictionary of certain, "user-interesting" properties
		of the data field, in a dict of strings.
		"""
        return {
            "name": unicode(self.name),
            "description": self.description or "N/A",
            "tablehead": self.getLabel(),
            "unit": self.unit or "N/A",
            "ucd": self.ucd or "N/A",
            "verbLevel": self.verbLevel,
            "indexState": self._indexedCleartext[self.isIndexed()],
            "note": self.note,
        }

    def getDDL(self):
        """returns an SQL fragment describing this column ready for 
		inclusion in a DDL statement.
		"""
        type = self.type
        # we have one "artificial" type, and it shouldn't become more than
        # one; so, a simple hack should do it.
        if type.upper() == "UNICODE":
            type = "TEXT"

        # The "str" does magic for delimited identifiers, so it's important.
        items = [str(self.name), type]
        if self.required:
            items.append("NOT NULL")
        return " ".join(items)

    def getDisplayHintAsString(self):
        return self._displayHint.unparse(self.displayHint)

    def getLabel(self):
        """returns a short label for this column.

		The label is either the tablehead or, missing it, the capitalized
		column name.
		"""
        if self.tablehead is not None:
            return self.tablehead
        return str(self.name).capitalize()

    def _getVOTableType(self):
        """returns the VOTable type, arraysize and xtype for this
		column-like thing.
		"""
        type, arraysize, xtype = base.sqltypeToVOTable(self.type)

        if self.type == "date":
            xtype = "dachs:DATE"

        return type, arraysize, xtype
class FITSProdGrammar(Grammar):
    r"""A grammar that returns FITS-headers as dictionaries.

	This is the grammar you want when one FITS file corresponds to one
	row in the destination table.

	The keywords of the grammar record are the cards in the primary
	header (or some other hdu using the same-named attribute).  "-" in
	keywords is replaced with an underscore for easier @-referencing.
	You can use a mapKeys element to effect further name cosmetics.

	This grammar should handle compressed FITS images transparently if
	set qnd="False".  This means that you will essentially get the readers
	from the second extension for those even if you left hdu="0".

	The original header is preserved as the value of the header\_ key.  This
	is mainly intended for use WCS use, as in ``pywcs.WCS(@header_)``.

	If you have more complex structures in your FITS files, you can get access
	to the pyfits HDU using the hdusField attribute.  With
	``hdusField="_H"``, you could say things like ``@_H[1].data[10][0]``
	to get the first data item in the tenth row in the second HDU.
	"""
    name_ = "fitsProdGrammar"

    _qnd = base.BooleanAttribute(
        "qnd",
        default=True,
        description="Use a hack to read the FITS header more quickly.  This only"
        " works for the primary HDU",
        copyable=True)
    _hduIndex = base.IntAttribute(
        "hdu",
        default=0,
        description="Take the header from this HDU.  You must say qnd='False'"
        " for this to take effect.",
        copyable=True)
    _mapKeys = base.StructAttribute(
        "mapKeys",
        childFactory=MapKeys,
        default=None,
        copyable=True,
        description="Prescription for how to"
        " map header keys to grammar dictionary keys")
    _hdusAttr = base.UnicodeAttribute(
        "hdusField",
        default=None,
        description="If set, the complete pyfits HDU list for the FITS"
        " file is returned in this grammar field.",
        copyable=True)
    _maxHeaderBlocks = base.IntAttribute(
        "maxHeaderBlocks",
        default=40,
        copyable=True,
        description="Stop looking for"
        " FITS END cards and raise an error after this many blocks."
        " You may need to raise this for people dumping obscene amounts"
        " of data or history into headers.")

    rowIterator = FITSProdIterator

    def onElementComplete(self):
        if self.mapKeys is None:
            self.mapKeys = base.makeStruct(MapKeys)
        self._onElementCompleteNext(FITSProdGrammar)
class Execute(base.Structure, base.ExpansionDelegator):
    """a container for calling code.

	This is a cron-like functionality.  The jobs are run in separate
	threads, so they need to be thread-safe with respect to the
	rest of DaCHS.	DaCHS serializes calls, though, so that your
	code should never run twice at the same time.

	At least on CPython, you must make sure your code does not
	block with the GIL held; this is still in the server process.
	If you do daring things, fork off (note that you must not use
	any database connections you may have after forking, which means
	you can't safely use the RD passed in).  See the docs on `Element job`_.

	Then testing/debugging such code, use ``gavo admin execute rd#id``
	to immediately run the jobs.
	"""
    name_ = "execute"

    _title = base.UnicodeAttribute(
        "title",
        default=base.Undefined,
        description="Some descriptive title for the job; this is used"
        " in diagnostics.",
        copyable=False,
    )

    _at = base.StringListAttribute(
        "at",
        description="One or more hour:minute pairs at which to run"
        " the code each day.  This conflicts with every.  Optionally,"
        " you can prefix each time by one of m<dom> or w<dow> for"
        " jobs only to be exectued at some day of the month or week, both"
        " counted from 1.  So, 'm22 7:30, w3 15:02' would execute on"
        " the 22nd of each month at 7:30 UTC and on every wednesday at 15:02.",
        default=base.NotGiven,
        copyable=True,
    )

    _every = base.IntAttribute(
        "every",
        default=base.NotGiven,
        description="Run the job roughly every this many seconds."
        "  This conflicts with at.  Note that the first execution of"
        " such a job is after every/10 seconds, and that the timers"
        " start anew at every server restart.  So, if you restart"
        " often, these jobs may run much more frequently or not at all"
        " if the interval is large.  If every is smaller than zero, the"
        " job will be executed immediately when the RD is being loaded and is"
        " then run every abs(every) seconds",
        copyable=True,
    )

    _job = base.StructAttribute(
        "job",
        childFactory=CronJob,
        default=base.Undefined,
        description="The code to run.",
        copyable=True,
    )

    _debug = base.BooleanAttribute(
        "debug",
        description="If true, on execution of external processes (span or"
        " spawnPython), the output will be accumulated and mailed to"
        " the administrator.  Note that output of the actual cron job"
        " itself is not caught (it might turn up in serverStderr)."
        " You could use execDef.outputAccum.append(<stuff>) to have"
        " information from within the code included.",
        default=False)

    _properties = base.PropertyAttribute()

    _rd = common.RDAttribute()

    def spawn(self, cliList):
        """spawns an external command, capturing the output and mailing it
		to the admin if it failed.

		Output is buffered and mailed, so it shouldn't be  too large.

		This does not raise an exception if it failed (in normal usage,
		this would cause two mails to be sent).  Instead, it returns the 
		returncode of the spawned process; if that's 0, you're ok.  But
		in general, you wouldn't want to check it.
		"""
        p = subprocess.Popen(cliList,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             close_fds=True)
        childOutput, _ = p.communicate()
        if p.returncode:
            cron.sendMailToAdmin(
                "A process spawned by %s failed with %s" %
                (self.title, p.returncode),
                "Output of %s:\n\n%s" % (cliList, childOutput))

        elif self.debug:
            if childOutput:
                self.outputAccum.append("\n\n%s -> %s\n" %
                                        (cliList, p.returncode))
                self.outputAccum.append(childOutput)

        return p.returncode

    def spawnPython(self, pythonFile):
        """spawns a new python interpreter executing pythonFile.

		pythonFile may be resdir-relative.
		"""
        self.spawn(["python", os.path.join(self.rd.resdir, pythonFile)])

    def _parseAt(self, atSpec, ctx):
        """returns a tuple ready for cron.repeatAt from atSpec.

		see the at StringListAttribute for how it would look like; this
		parses one element of that string list.
		"""
        mat = re.match(
            r"(?P<dow>w\d\s+)?"
            r"(?P<dom>m\d\d?\s+)?"
            r"(?P<hr>\d+):(?P<min>\d+)", atSpec)
        if not mat:
            raise base.LiteralParseError("at", atSpec, pos=ctx.pos, hint=
             "This is hour:minute optionally prefixed by either w<weekday> or"\
             " m<day of month>, each counted from 1.")

        hour, minute = int(mat.group("hr")), int(mat.group("min"))
        if not (0 <= hour <= 23 and 0 <= minute <= 59):
            raise base.LiteralParseError(
                "at",
                atSpec,
                pos=ctx.pos,
                hint=
                "This must be hour:minute with 0<=hour<=23 or 0<=minute<=59")

        dom = None
        if mat.group("dom"):
            dom = int(mat.group("dom")[1:])
            if not 1 <= dom <= 28:
                raise base.LiteralParseError(
                    "at",
                    atSpec,
                    pos=ctx.pos,
                    hint="day-of-month in at must be between 1 and 28.")

        dow = None
        if mat.group("dow"):
            dow = int(mat.group("dow")[1:])
            if not 1 <= dow <= 7:
                raise base.LiteralParseError(
                    "at",
                    atSpec,
                    pos=ctx.pos,
                    hint="day-of-week in at must be between 1 and 7.")

        return (dom, dow, hour, minute)

    def completeElement(self, ctx):
        self._completeElementNext(Execute, ctx)
        if len([s for s in [self.at, self.every] if s is base.NotGiven]) != 1:
            raise base.StructureError(
                "Exactly one of at and every required"
                " for Execute",
                pos=ctx.pos)

        if self.at is not base.NotGiven:
            self.parsedAt = []
            for literal in self.at:
                self.parsedAt.append(self._parseAt(literal, ctx))

    def onElementComplete(self):
        self._onElementCompleteNext(Execute)

        self.jobName = "%s#%s" % (self.rd.sourceId, self.title)

        self.callable = _guardedFunctionFactory.makeGuardedThreaded(
            self.job.compile(), self)

        if self.at is not base.NotGiven:
            cron.repeatAt(self.parsedAt, self.jobName, self.callable)
        else:
            cron.runEvery(self.every, self.jobName, self.callable)
Beispiel #11
0
class DBCore(TableBasedCore):
	"""A core performing database queries on one table or view.

	DBCores ask the service for the desired output schema and adapt their
	output.  The DBCore's output table, on the other hand, lists all fields 
	available from the queried table.
	"""
	name_ = "dbCore"

	_sortKey = base.UnicodeAttribute("sortKey",
		description="A pre-defined sort order (suppresses DB options widget)."
		"  The sort key accepts multiple columns, separated by commas.",
		copyable=True)
	_limit = base.IntAttribute("limit", description="A pre-defined"
		" match limit (suppresses DB options widget).", copyable=True)
	_distinct = base.BooleanAttribute("distinct", description="Add a"
		" 'distinct' modifier to the query?", default=False, copyable=True)
	_groupBy = base.UnicodeAttribute("groupBy", description=
		"A group by clause.  You shouldn't generally need this, and if"
		" you use it, you must give an outputTable to your core.",
		default=None)

	def wantsTableWidget(self):
		return self.sortKey is None and self.limit is None

	def getQueryCols(self, service, queryMeta):
		"""returns the fields we need in the output table.

		The normal DbBased core just returns whatever the service wants.
		Derived cores, e.g., for special protocols, could override this
		to make sure they have some fields in the result they depend on.
		"""
		return service.getCurOutputFields(queryMeta)

	def _runQuery(self, resultTableDef, fragment, pars, queryMeta,
			**kwargs):
		with base.getTableConn()  as conn:
			queriedTable = rsc.TableForDef(self.queriedTable, nometa=True,
				create=False, connection=conn)
			queriedTable.setTimeout(queryMeta["timeout"])

			if fragment and pars:
				resultTableDef.addMeta("info", repr(pars),
					infoName="queryPars", infoValue=fragment)

			iqArgs = {"limits": queryMeta.asSQL(), "distinct": self.distinct,
				"groupBy": self.groupBy}
			iqArgs.update(kwargs)

			try:
				try:
					return self._makeTable(
						queriedTable.iterQuery(resultTableDef, fragment, pars,
							**iqArgs), resultTableDef, queryMeta)
				except:
					mapDBErrors(*sys.exc_info())
			finally:
				queriedTable.close()

	def _makeResultTableDef(self, service, inputTable, queryMeta):
		"""returns an OutputTableDef object for querying our table with queryMeta.
		"""
		return base.makeStruct(outputdef.OutputTableDef,
			parent_=self.queriedTable.parent, id="result",
			onDisk=False, columns=self.getQueryCols(service, queryMeta),
			params=self.queriedTable.params)

	def run(self, service, inputTable, queryMeta):
		"""does the DB query and returns an InMemoryTable containing
		the result.
		"""
		resultTableDef = self._makeResultTableDef(
			service, inputTable, queryMeta)

		resultTableDef.copyMetaFrom(self.queriedTable)
		if not resultTableDef.columns:
			raise base.ValidationError("No output columns with these settings."
				"_OUTPUT")

		sortKeys = None
		if self.sortKey:
			sortKeys = self.sortKey.split(",")

		queryMeta.overrideDbOptions(limit=self.limit, sortKeys=sortKeys,
			sortFallback=self.getProperty("defaultSortKey", None))
		try:
			fragment, pars = self._getSQLWhere(inputTable, queryMeta)
		except base.LiteralParseError, ex:
			raise base.ui.logOldExc(base.ValidationError(str(ex),
				colName=ex.attName))
		queryMeta["sqlQueryPars"] = pars
		return self._runQuery(resultTableDef, fragment, pars, queryMeta)
Beispiel #12
0
class OutputTableDef(rscdef.TableDef):
    """A table that has outputFields for columns.
	"""
    name_ = "outputTable"

    # Don't validate meta for these -- while they are children
    # of validated structures (services), they don't need any
    # meta at all.  This should go as soon as we have a sane
    # inheritance hierarchy for tables.
    metaModel = None

    _cols = rscdef.ColumnListAttribute(
        "columns",
        childFactory=OutputField,
        description="Output fields for this table.",
        aliases=["column"],
        copyable=True)

    _verbLevel = base.IntAttribute(
        "verbLevel",
        default=None,
        description="Copy over columns from fromTable not"
        " more verbose than this.")

    _autocols = base.StringListAttribute(
        "autoCols",
        description="Column names obtained from fromTable; you can use"
        " shell patterns into the output table's parent table (in a table"
        " core, that's the queried table; in a service, it's the core's"
        " output table) here.")

    def __init__(self, parent, **kwargs):
        rscdef.TableDef.__init__(self, parent, **kwargs)
        self.parentTable = None
        try:
            # am I in a table-based core?
            self.parentTable = self.parent.queriedTable
        except (AttributeError, base.StructureError):
            # no.
            pass

        if not self.parentTable:
            try:
                # am I in a service with a core with output table?
                self.parentTable = self.parent.core.outputTable
            except (AttributeError, base.StructureError):
                # no.
                pass

        if not self.parentTable:
            # no suitable column source, use an empty table:
            self.parentTable = _EMPTY_TABLE

        self.namePath = None

    def _adoptColumn(self, sourceColumn):
        # Do not overwrite existing fields here to let the user
        # override individually
        try:
            self.getColumnByName(sourceColumn.name)
        except base.NotFoundError:
            self.feedObject("outputField",
                            OutputField.fromColumn(sourceColumn))

    def _addNames(self, ctx, names):
        # since autoCols is not copyable, we can require
        # that _addNames only be called when there's a real parse context.
        if ctx is None:
            raise base.StructureError("outputTable autocols is"
                                      " only available with a parse context")
        for name in names:
            self._addName(ctx, name)

    def _addName(self, ctx, name):
        """adopts a param or column name into the outputTable.

		name may be a reference or a param or column name in the parent
		table (as determined in the constructor, i.e., the queried table
		of a core or the output table of a service's core.

		You can also use shell patterns into parent columns.
		"""
        if utils.identifierPattern.match(name):
            refOb = ctx.resolveId(name, self)
            if refOb.name_ == "param":
                self.feedObject("param", refOb.copy(self))
            else:
                self._adoptColumn(refOb)

        else:
            # it's a shell pattern into parent table
            for col in self.parentTable:
                if fnmatch.fnmatch(col.name, name):
                    self._adoptColumn(col)

    def completeElement(self, ctx):
        if self.autoCols:
            self._addNames(ctx, self.autoCols)

        if self.verbLevel:
            table = self.parentTable
            for col in table.columns:
                if col.verbLevel <= self.verbLevel:
                    self._adoptColumn(col)
            for par in table.params:
                if par.verbLevel <= self.verbLevel:
                    self.feedObject("param", par.copy(self))

        self._completeElementNext(OutputTableDef, ctx)

    @classmethod
    def fromColumns(cls, columns, **kwargs):
        return rscdef.TableDef.fromColumns(
            [OutputField.fromColumn(c) for c in columns])

    @classmethod
    def fromTableDef(cls, tableDef, ctx):
        return cls(None,
                   columns=[OutputField.fromColumn(c) for c in tableDef],
                   forceUnique=tableDef.forceUnique,
                   dupePolicy=tableDef.dupePolicy,
                   primary=tableDef.primary,
                   params=tableDef.params).finishElement(ctx)