def __call__(self, tag, *children, **attrib): elem = ET.Element(tag, attrib) for item in children: if isinstance(item, dict): elem.attrib.update(item) elif isinstance(item, basestring): if len(elem): elem[-1].tail = (elem[-1].tail or "") + item else: elem.text = (elem.text or "") + item elif ET.iselement(item): elem.append(item) else: raise TypeError("bad argument: %r" % item) return elem
def concat(docs): """ Concatenate together the contents of multiple documents from a single corpus, using an appropriate concatenation function. This utility function is used by corpus readers when the user requests more than one document at a time. """ if len(docs) == 1: return docs[0] if len(docs) == 0: raise ValueError('concat() expects at least one object!') types = set([d.__class__ for d in docs]) # If they're all strings, use string concatenation. if types.issubset([str, unicode, basestring]): return reduce((lambda a, b: a + b), docs, '') # If they're all corpus views, then use ConcatenatedCorpusView. for typ in types: if not issubclass(typ, AbstractCorpusView): break else: return ConcatenatedCorpusView(docs) # Otherwise, see what we can do: if len(types) == 1: typ = list(types)[0] if issubclass(typ, list): return reduce((lambda a, b: a + b), docs, []) if issubclass(typ, tuple): return reduce((lambda a, b: a + b), docs, ()) if ElementTree.iselement(typ): xmltree = ElementTree.Element('documents') for doc in docs: xmltree.append(doc) return xmltree # No method found! raise ValueError("Don't know how to concatenate types: %r" % types)