Exemple #1
0
class ColToRowGrammar(grammars.Grammar):
	"""is a grammar that selects some columns and returns each of them
	as a row with a specified key.

	This is useful to extract all products from tables that can have
	multiple columns carrying products.

	The input is a sequence of dictionaries (i.e., Table rows).
	"""

	rowIterator = ColToRowIterator

	_targetKey = base.UnicodeAttribute("targetKey", default=base.Undefined,
		description="Name of the target column")
	_sourceKeys = base.ListOfAtomsAttribute("sourceKeys",
		description="Names of the source columns.", 
		itemAttD=base.UnicodeAttribute("sourceKey"))
Exemple #2
0
class SourceSpec(base.Structure):
	"""A Specification of a data descriptor's inputs.

	This will typcially be files taken from a file system.  If so, DaCHS will,
	in each directory, process the files in alphabetical order.  No guarantees
	are made as to the sequence directories are processed in.

	Multiple patterns are processed in the order given in the RD.
	"""
	name_ = "sources"

	_patterns = base.ListOfAtomsAttribute("patterns", description=
		"Paths to the source files.  You can use shell patterns here.",
		itemAttD=base.UnicodeAttribute("pattern", description="Shell pattern"
			" for source file(s), relative to resource directory."),
		copyable=True)
	_items = base.ListOfAtomsAttribute("items", description=
		"String literals to pass to grammars.  In contrast to patterns,"
		" they are not interpreted as file names but passed to the"
		" grammar verbatim.  Normal grammars do not like this. It is"
		" mainly intended for use with custom or null grammars.",
		itemAttD=base.UnicodeAttribute("item", 
			description="Grammar-specific string"), copyable=True)
	_recurse = base.BooleanAttribute("recurse", default=False,
		description="Search for pattern(s) recursively in their directory"
			" part(s)?", copyable=True)
	_ignore = base.StructAttribute("ignoredSources", childFactory=
		IgnoreSpec, description="Specification of sources that should not"
			" be processed although they match patterns.  Typically used"
			" in update-type data descriptors.", copyable=True)
	_file = base.DataContent(description="A single"
		" file name (this is for convenience)", copyable="True")
	_original = base.OriginalAttribute()

	def __iter__(self):
		return self.iterSources()

	def completeElement(self, ctx):
		if self.ignoredSources is base.Undefined:
			self.ignoredSources = base.makeStruct(IgnoreSpec)
		self._completeElementNext(SourceSpec, ctx)

	def _expandDirParts(self, dirParts, ignoreDotDirs=True):
		"""expands a list of directories into a list of them and all their
		descendants.

		It follows symbolic links but doesn't do any bookkeeping, so bad
		things will happen if the directory graph contains cycles.
		"""
		res = []
		for root in dirParts:
			for root, dirs, files in os.walk(root):
				if ignoreDotDirs:
					if os.path.basename(root).startswith("."):
						continue
					dirs = [dir for dir in dirs if not dir.startswith(".")]
				dirs = (os.path.join(root, dir) for dir in dirs)
				res.extend(dir for dir in dirs if os.path.isdir(dir))
				for child in files:
					destName = os.path.join(root, child)
					if os.path.islink(destName) and not os.path.isfile(destName):
						res.extend(self._expandDirParts(destName))
		return res

	def iterSources(self, connection=None):
		self.ignoredSources.prepare(connection)
		for item in self.items:
			if not self.ignoredSources.isIgnored(item):
				yield item

		baseDir = ""
		if self.parent.rd:
			baseDir = self.parent.rd.resdir

		for pattern in self.patterns:
			dirPart, baseName = os.path.split(pattern)
			if self.parent.rd:
				dirParts = [os.path.join(baseDir, dirPart)]
			else:
				dirParts = [dirPart]
			if self.recurse:
				dirParts = dirParts+self._expandDirParts(dirParts)
			for dir in sorted(dirParts):
				for name in sorted(glob.glob(os.path.join(dir, baseName))):
					fullName = os.path.abspath(name)
					if not self.ignoredSources.isIgnored(fullName):
						yield fullName
		if self.content_:
			yield os.path.abspath(os.path.join(baseDir, self.content_))
	
	def __nonzero__(self):
		return (not not self.patterns) or (not not self.items
			) or (not not self.content_)
Exemple #3
0
class IgnoreSpec(base.Structure):
	"""A specification of sources to ignore.

	Sources mentioned here are compared against the inputsDir-relative path
	of sources generated by sources (cf. `Element sources`_).  If there is
	a match, the corresponding source will not be processed.

	You can get ignored files from various sources.  If you give more
	than one source, the set of ignored files is the union of the the 
	individual sets.
	"""
	name_ = "ignoreSources"

	_fromdb = base.UnicodeAttribute("fromdb", default=None,
		description="A DB query to obtain a set of sources to ignore; the"
			" select clause must select exactly one column containing the"
			" source key.")
	_fromfile = common.ResdirRelativeAttribute("fromfile", default=None,
		description="A name of a file containing blacklisted source"
			" paths, one per line.  Empty lines and lines beginning with a hash"
			" are ignored.")
	_patterns = base.ListOfAtomsAttribute("patterns", description=
		"Shell patterns to ignore.  Slashes are treated like any other"
		" character, i.e., patterns do not know about paths.",
		itemAttD=base.UnicodeAttribute("pattern", description="Shell pattern"
			" for source file(s), relative to resource directory."),
		copyable=True)
	_rd = common.RDAttribute()

	def prepare(self, connection):
		"""sets attributes to speed up isIgnored()
		"""
		self.inputsDir = base.getConfig("inputsDir")
		self.ignoredSet = set()

		if self.fromdb and connection is not None:
			try:
				with connection.savepoint():
					self.ignoredSet |= set(r[0] 
						for r in connection.query(self.fromdb))
			except base.DBError: # table probably doesn't exist yet.
				base.ui.notifyError("ignore fromdb failed (probably no table yet)")

		if self.fromfile:
			for ln in open(self.fromfile):
				ln = ln.strip()
				if ln and not ln.startswith("#"):
					self.ignoredSet.add(ln)

	def isIgnored(self, path):
		"""returns true if path, made inputsdir-relative, should be ignored.
		"""
		try:
			path = utils.getRelativePath(path, self.inputsDir, liberalChars=True)
		except ValueError: # not in inputs, use full path.
			pass
		if path in self.ignoredSet:
			return True
		for pat in self.patterns:
			if fnmatch.fnmatch(path, pat):
				return True
		return False
Exemple #4
0
class DataDescriptor(base.Structure, base.ComputedMetaMixin, 
		common.IVOMetaMixin, tabledef.PublishableDataMixin):
	"""A description of how to process data from a given set of sources.

	Data descriptors bring together a grammar, a source specification and
	"makes", each giving a table and a rowmaker to feed the table from the
	grammar output.

	They are the "executable" parts of a resource descriptor.  Their ids
	are used as arguments to gavoimp for partial imports.
	"""
	name_ = "data"
	resType = "data"

	_rowmakers = base.StructListAttribute("rowmakers",
		childFactory=rmkdef.RowmakerDef, 
		description="Embedded build rules (usually rowmakers are defined toplevel)",
		copyable=True,
		before="makes")

	_tables = base.StructListAttribute("tables",
		childFactory=tabledef.TableDef, 
		description="Embedded table definitions (usually, tables are defined"
			" toplevel)", 
		copyable=True,
		before="makes")

	_grammar = base.MultiStructAttribute("grammar", 
		default=None,
		childFactory=builtingrammars.getGrammar,
		childNames=builtingrammars.GRAMMAR_REGISTRY.keys(),
		description="Grammar used to parse this data set.", 
		copyable=True,
		before="makes")
	
	_sources = base.StructAttribute("sources", 
		default=None, 
		childFactory=SourceSpec,
		description="Specification of sources that should be fed to the grammar.",
		copyable=True,
		before="grammar")

	_dependents = base.ListOfAtomsAttribute("dependents",
		itemAttD=base.UnicodeAttribute("recreateAfter"),
		description="A data ID to recreate when this resource is"
			" remade; use # syntax to reference in other RDs.")

	_auto = base.BooleanAttribute("auto", 
		default=True, 
		description="Import this data set if not explicitly"
			" mentioned on the command line?")

	_updating = base.BooleanAttribute("updating", 
		default=False,
		description="Keep existing tables on import?  You usually want this"
			" False unless you have some kind of sources management,"
			" e.g., via a sources ignore specification.", 
		copyable=True)

	_makes = base.StructListAttribute("makes", 
		childFactory=Make,
		copyable=True, 
		description="Specification of a target table and the rowmaker"
			" to feed them.")
	
	_params = common.ColumnListAttribute("params",
		childFactory=column.Param, 
		description='Param ("global columns") for this data (mostly for'
		 ' VOTable serialization).', 
		copyable=True)

	_properties = base.PropertyAttribute()

	_rd = common.RDAttribute()

	_original = base.OriginalAttribute()

	metaModel = ("title(1), creationDate(1), description(1),"
		"subject, referenceURL(1)")

	def __repr__(self):
		return "<data descriptor with id %s>"%self.id

	def validate(self):
		self._validateNext(DataDescriptor)
		if self.registration and self.id is None:
			raise base.StructureError("Published data needs an assigned id.")

	def onElementComplete(self):
		self._onElementCompleteNext(DataDescriptor)
		for t in self.tables:
			t.setMetaParent(self)
		if self.registration:
			self.registration.register()

	# since we want to be able to create DDs dynamically , they must find their
	# meta parent themselves.  We do this while the DD is being adopted;
	# the rules here are: if the parent is a meta mixin itself, it's the
	# meta parent, if it has an rd attribute, use that, else give up.
	# TODO: For DDs on cores, it would be *desirable* to come up
	# with some magic that makes the current service their meta parent.

	def _getParent(self):
		return self.__parent
	
	def _setParent(self, value):
		self.__parent = value
		if isinstance(value, base.MetaMixin):
			self.setMetaParent(value)
		elif hasattr(value, "rd"):
			self.setMetaParent(value.rd)
	
	parent = property(_getParent, _setParent)

	def iterSources(self, connection=None):
		if self.sources:
			return self.sources.iterSources(connection)
		else:
			return iter([])

	def __iter__(self):
		for m in self.makes:
			yield m.table

	def iterTableDefs(self):
		"""iterates over the definitions of all the tables built by this DD.
		"""
		for m in self.makes:
			yield m.table

	def getTableDefById(self, id):
		for td in self.iterTableDefs():
			if td.id==id:
				return td
		raise base.StructureError("No table name %s will be built"%id)

	def getTableDefWithRole(self, role):
		for m in self.makes:
			if m.role==role:
				return m.table
		raise base.StructureError("No table def with role '%s'"%role)

	def getPrimary(self):
		"""returns the "primary" table definition in the data descriptor.

		"primary" means the only table in a one-table dd, the table with the
		role "primary" if there are more.  If no matching table is found, a
		StructureError is raised.
		"""
		if len(self.makes)==1:
			return self.makes[0].table
		else:
			try:
				return self.getTableDefWithRole("primary")
			except base.StructureError: # raise more telling message
				pass
		raise base.StructureError("Ambiguous request for primary table")

	def copyShallowly(self):
		"""returns a shallow copy of self.

		Sources are not copied.
		"""
		return DataDescriptor(self.parent, rowmakers=self.rowmakers[:],
			tables=self.tables[:], grammar=self.grammar, makes=self.makes[:])
	
	def getURL(self, rendName, absolute=True):
		# there's no sensible URL for DDs; thus, let people browse
		# the RD info.  At least they should find links to any tables
		# included here there.
		basePath = "%sbrowse/%s"%(
			base.getConfig("web", "nevowRoot"),
			self.rd.sourceId)
		if absolute:
			return base.getConfig("web", "serverURL")+basePath
		return basePath
class ProcSetup(base.Structure):
	"""Prescriptions for setting up a namespace for a procedure application.

	You can add names to this namespace you using par(ameter)s.
	If a parameter has no default and an procedure application does
	not provide them, an error is raised.

	You can also add names by providing a code attribute containing
	a python function body in code.  Within, the parameters are
	available.  The procedure application's parent can be accessed
	as parent.  All names you define in the code are available as
	globals to the procedure body.

	Caution: Macros are expanded within the code; this means you
	need double backslashes if you want a single backslash in python
	code.
	"""
	name_ = "setup"

	_code = base.ListOfAtomsAttribute("codeFrags",
		description="Python function bodies setting globals for the function"
		" application.  Macros are expanded in the context"
		" of the procedure's parent.", 
		itemAttD=base.UnicodeAttribute("code", description="Python function"
			" bodies setting globals for the function application.  Macros"
			" are expanded in the context of the procedure's parent.",
			copyable=True),
		copyable=True)
	_pars = base.StructListAttribute("pars", ProcPar,
		description="Names to add to the procedure's global namespace.", 
		copyable=True)
	_original = base.OriginalAttribute()

	def _getParSettingCode(self, useLate, indent, bindings):
		"""returns code that sets our parameters.

		If useLate is true, generate for late bindings.  Indent the
		code by indent.  Bindings is is a dictionary overriding
		the defaults or setting parameter values.
		"""
		parCode = []
		for p in self.pars:
			if p.late==useLate:
				val = bindings.get(p.key, base.NotGiven)
				if val is base.NotGiven:
					val = p.content_
				parCode.append("%s%s = %s"%(indent, p.key, val))
		return "\n".join(parCode)

	def getParCode(self, bindings):
		"""returns code doing setup bindings un-indented.
		"""
		return self._getParSettingCode(False, "", bindings)

	def getLateCode(self, bindings):
		"""returns code doing late (in-function) bindings indented with two
		spaces.
		"""
		return self._getParSettingCode(True, "  ", bindings)

	def getBodyCode(self):
		"""returns the body code un-indented.
		"""
		collectedCode = []
		for frag in self.codeFrags:
			collectedCode.append(
				utils.fixIndentation(frag, "", governingLine=1))
		return "\n".join(collectedCode)