class ColToRowGrammar(grammars.Grammar): """is a grammar that selects some columns and returns each of them as a row with a specified key. This is useful to extract all products from tables that can have multiple columns carrying products. The input is a sequence of dictionaries (i.e., Table rows). """ rowIterator = ColToRowIterator _targetKey = base.UnicodeAttribute("targetKey", default=base.Undefined, description="Name of the target column") _sourceKeys = base.ListOfAtomsAttribute("sourceKeys", description="Names of the source columns.", itemAttD=base.UnicodeAttribute("sourceKey"))
class SourceSpec(base.Structure): """A Specification of a data descriptor's inputs. This will typcially be files taken from a file system. If so, DaCHS will, in each directory, process the files in alphabetical order. No guarantees are made as to the sequence directories are processed in. Multiple patterns are processed in the order given in the RD. """ name_ = "sources" _patterns = base.ListOfAtomsAttribute("patterns", description= "Paths to the source files. You can use shell patterns here.", itemAttD=base.UnicodeAttribute("pattern", description="Shell pattern" " for source file(s), relative to resource directory."), copyable=True) _items = base.ListOfAtomsAttribute("items", description= "String literals to pass to grammars. In contrast to patterns," " they are not interpreted as file names but passed to the" " grammar verbatim. Normal grammars do not like this. It is" " mainly intended for use with custom or null grammars.", itemAttD=base.UnicodeAttribute("item", description="Grammar-specific string"), copyable=True) _recurse = base.BooleanAttribute("recurse", default=False, description="Search for pattern(s) recursively in their directory" " part(s)?", copyable=True) _ignore = base.StructAttribute("ignoredSources", childFactory= IgnoreSpec, description="Specification of sources that should not" " be processed although they match patterns. Typically used" " in update-type data descriptors.", copyable=True) _file = base.DataContent(description="A single" " file name (this is for convenience)", copyable="True") _original = base.OriginalAttribute() def __iter__(self): return self.iterSources() def completeElement(self, ctx): if self.ignoredSources is base.Undefined: self.ignoredSources = base.makeStruct(IgnoreSpec) self._completeElementNext(SourceSpec, ctx) def _expandDirParts(self, dirParts, ignoreDotDirs=True): """expands a list of directories into a list of them and all their descendants. It follows symbolic links but doesn't do any bookkeeping, so bad things will happen if the directory graph contains cycles. """ res = [] for root in dirParts: for root, dirs, files in os.walk(root): if ignoreDotDirs: if os.path.basename(root).startswith("."): continue dirs = [dir for dir in dirs if not dir.startswith(".")] dirs = (os.path.join(root, dir) for dir in dirs) res.extend(dir for dir in dirs if os.path.isdir(dir)) for child in files: destName = os.path.join(root, child) if os.path.islink(destName) and not os.path.isfile(destName): res.extend(self._expandDirParts(destName)) return res def iterSources(self, connection=None): self.ignoredSources.prepare(connection) for item in self.items: if not self.ignoredSources.isIgnored(item): yield item baseDir = "" if self.parent.rd: baseDir = self.parent.rd.resdir for pattern in self.patterns: dirPart, baseName = os.path.split(pattern) if self.parent.rd: dirParts = [os.path.join(baseDir, dirPart)] else: dirParts = [dirPart] if self.recurse: dirParts = dirParts+self._expandDirParts(dirParts) for dir in sorted(dirParts): for name in sorted(glob.glob(os.path.join(dir, baseName))): fullName = os.path.abspath(name) if not self.ignoredSources.isIgnored(fullName): yield fullName if self.content_: yield os.path.abspath(os.path.join(baseDir, self.content_)) def __nonzero__(self): return (not not self.patterns) or (not not self.items ) or (not not self.content_)
class IgnoreSpec(base.Structure): """A specification of sources to ignore. Sources mentioned here are compared against the inputsDir-relative path of sources generated by sources (cf. `Element sources`_). If there is a match, the corresponding source will not be processed. You can get ignored files from various sources. If you give more than one source, the set of ignored files is the union of the the individual sets. """ name_ = "ignoreSources" _fromdb = base.UnicodeAttribute("fromdb", default=None, description="A DB query to obtain a set of sources to ignore; the" " select clause must select exactly one column containing the" " source key.") _fromfile = common.ResdirRelativeAttribute("fromfile", default=None, description="A name of a file containing blacklisted source" " paths, one per line. Empty lines and lines beginning with a hash" " are ignored.") _patterns = base.ListOfAtomsAttribute("patterns", description= "Shell patterns to ignore. Slashes are treated like any other" " character, i.e., patterns do not know about paths.", itemAttD=base.UnicodeAttribute("pattern", description="Shell pattern" " for source file(s), relative to resource directory."), copyable=True) _rd = common.RDAttribute() def prepare(self, connection): """sets attributes to speed up isIgnored() """ self.inputsDir = base.getConfig("inputsDir") self.ignoredSet = set() if self.fromdb and connection is not None: try: with connection.savepoint(): self.ignoredSet |= set(r[0] for r in connection.query(self.fromdb)) except base.DBError: # table probably doesn't exist yet. base.ui.notifyError("ignore fromdb failed (probably no table yet)") if self.fromfile: for ln in open(self.fromfile): ln = ln.strip() if ln and not ln.startswith("#"): self.ignoredSet.add(ln) def isIgnored(self, path): """returns true if path, made inputsdir-relative, should be ignored. """ try: path = utils.getRelativePath(path, self.inputsDir, liberalChars=True) except ValueError: # not in inputs, use full path. pass if path in self.ignoredSet: return True for pat in self.patterns: if fnmatch.fnmatch(path, pat): return True return False
class DataDescriptor(base.Structure, base.ComputedMetaMixin, common.IVOMetaMixin, tabledef.PublishableDataMixin): """A description of how to process data from a given set of sources. Data descriptors bring together a grammar, a source specification and "makes", each giving a table and a rowmaker to feed the table from the grammar output. They are the "executable" parts of a resource descriptor. Their ids are used as arguments to gavoimp for partial imports. """ name_ = "data" resType = "data" _rowmakers = base.StructListAttribute("rowmakers", childFactory=rmkdef.RowmakerDef, description="Embedded build rules (usually rowmakers are defined toplevel)", copyable=True, before="makes") _tables = base.StructListAttribute("tables", childFactory=tabledef.TableDef, description="Embedded table definitions (usually, tables are defined" " toplevel)", copyable=True, before="makes") _grammar = base.MultiStructAttribute("grammar", default=None, childFactory=builtingrammars.getGrammar, childNames=builtingrammars.GRAMMAR_REGISTRY.keys(), description="Grammar used to parse this data set.", copyable=True, before="makes") _sources = base.StructAttribute("sources", default=None, childFactory=SourceSpec, description="Specification of sources that should be fed to the grammar.", copyable=True, before="grammar") _dependents = base.ListOfAtomsAttribute("dependents", itemAttD=base.UnicodeAttribute("recreateAfter"), description="A data ID to recreate when this resource is" " remade; use # syntax to reference in other RDs.") _auto = base.BooleanAttribute("auto", default=True, description="Import this data set if not explicitly" " mentioned on the command line?") _updating = base.BooleanAttribute("updating", default=False, description="Keep existing tables on import? You usually want this" " False unless you have some kind of sources management," " e.g., via a sources ignore specification.", copyable=True) _makes = base.StructListAttribute("makes", childFactory=Make, copyable=True, description="Specification of a target table and the rowmaker" " to feed them.") _params = common.ColumnListAttribute("params", childFactory=column.Param, description='Param ("global columns") for this data (mostly for' ' VOTable serialization).', copyable=True) _properties = base.PropertyAttribute() _rd = common.RDAttribute() _original = base.OriginalAttribute() metaModel = ("title(1), creationDate(1), description(1)," "subject, referenceURL(1)") def __repr__(self): return "<data descriptor with id %s>"%self.id def validate(self): self._validateNext(DataDescriptor) if self.registration and self.id is None: raise base.StructureError("Published data needs an assigned id.") def onElementComplete(self): self._onElementCompleteNext(DataDescriptor) for t in self.tables: t.setMetaParent(self) if self.registration: self.registration.register() # since we want to be able to create DDs dynamically , they must find their # meta parent themselves. We do this while the DD is being adopted; # the rules here are: if the parent is a meta mixin itself, it's the # meta parent, if it has an rd attribute, use that, else give up. # TODO: For DDs on cores, it would be *desirable* to come up # with some magic that makes the current service their meta parent. def _getParent(self): return self.__parent def _setParent(self, value): self.__parent = value if isinstance(value, base.MetaMixin): self.setMetaParent(value) elif hasattr(value, "rd"): self.setMetaParent(value.rd) parent = property(_getParent, _setParent) def iterSources(self, connection=None): if self.sources: return self.sources.iterSources(connection) else: return iter([]) def __iter__(self): for m in self.makes: yield m.table def iterTableDefs(self): """iterates over the definitions of all the tables built by this DD. """ for m in self.makes: yield m.table def getTableDefById(self, id): for td in self.iterTableDefs(): if td.id==id: return td raise base.StructureError("No table name %s will be built"%id) def getTableDefWithRole(self, role): for m in self.makes: if m.role==role: return m.table raise base.StructureError("No table def with role '%s'"%role) def getPrimary(self): """returns the "primary" table definition in the data descriptor. "primary" means the only table in a one-table dd, the table with the role "primary" if there are more. If no matching table is found, a StructureError is raised. """ if len(self.makes)==1: return self.makes[0].table else: try: return self.getTableDefWithRole("primary") except base.StructureError: # raise more telling message pass raise base.StructureError("Ambiguous request for primary table") def copyShallowly(self): """returns a shallow copy of self. Sources are not copied. """ return DataDescriptor(self.parent, rowmakers=self.rowmakers[:], tables=self.tables[:], grammar=self.grammar, makes=self.makes[:]) def getURL(self, rendName, absolute=True): # there's no sensible URL for DDs; thus, let people browse # the RD info. At least they should find links to any tables # included here there. basePath = "%sbrowse/%s"%( base.getConfig("web", "nevowRoot"), self.rd.sourceId) if absolute: return base.getConfig("web", "serverURL")+basePath return basePath
class ProcSetup(base.Structure): """Prescriptions for setting up a namespace for a procedure application. You can add names to this namespace you using par(ameter)s. If a parameter has no default and an procedure application does not provide them, an error is raised. You can also add names by providing a code attribute containing a python function body in code. Within, the parameters are available. The procedure application's parent can be accessed as parent. All names you define in the code are available as globals to the procedure body. Caution: Macros are expanded within the code; this means you need double backslashes if you want a single backslash in python code. """ name_ = "setup" _code = base.ListOfAtomsAttribute("codeFrags", description="Python function bodies setting globals for the function" " application. Macros are expanded in the context" " of the procedure's parent.", itemAttD=base.UnicodeAttribute("code", description="Python function" " bodies setting globals for the function application. Macros" " are expanded in the context of the procedure's parent.", copyable=True), copyable=True) _pars = base.StructListAttribute("pars", ProcPar, description="Names to add to the procedure's global namespace.", copyable=True) _original = base.OriginalAttribute() def _getParSettingCode(self, useLate, indent, bindings): """returns code that sets our parameters. If useLate is true, generate for late bindings. Indent the code by indent. Bindings is is a dictionary overriding the defaults or setting parameter values. """ parCode = [] for p in self.pars: if p.late==useLate: val = bindings.get(p.key, base.NotGiven) if val is base.NotGiven: val = p.content_ parCode.append("%s%s = %s"%(indent, p.key, val)) return "\n".join(parCode) def getParCode(self, bindings): """returns code doing setup bindings un-indented. """ return self._getParSettingCode(False, "", bindings) def getLateCode(self, bindings): """returns code doing late (in-function) bindings indented with two spaces. """ return self._getParSettingCode(True, " ", bindings) def getBodyCode(self): """returns the body code un-indented. """ collectedCode = [] for frag in self.codeFrags: collectedCode.append( utils.fixIndentation(frag, "", governingLine=1)) return "\n".join(collectedCode)