Ejemplo n.º 1
0
 def __call__(self, tag, *children, **attrib):
     elem = ET.Element(tag, attrib)
     for item in children:
         if isinstance(item, dict):
             elem.attrib.update(item)
         elif isinstance(item, basestring):
             if len(elem):
                 elem[-1].tail = (elem[-1].tail or "") + item
             else:
                 elem.text = (elem.text or "") + item
         elif ET.iselement(item):
             elem.append(item)
         else:
             raise TypeError("bad argument: %r" % item)
     return elem
Ejemplo n.º 2
0
 def __call__(self, tag, *children, **attrib):
     elem = ET.Element(tag, attrib)
     for item in children:
         if isinstance(item, dict):
             elem.attrib.update(item)
         elif isinstance(item, basestring):
             if len(elem):
                 elem[-1].tail = (elem[-1].tail or "") + item
             else:
                 elem.text = (elem.text or "") + item
         elif ET.iselement(item):
             elem.append(item)
         else:
             raise TypeError("bad argument: %r" % item)
     return elem
Ejemplo n.º 3
0
def concat(docs):
    """
    Concatenate together the contents of multiple documents from a
    single corpus, using an appropriate concatenation function.  This
    utility function is used by corpus readers when the user requests
    more than one document at a time.
    """
    if len(docs) == 1:
        return docs[0]
    if len(docs) == 0:
        raise ValueError('concat() expects at least one object!')

    types = set([d.__class__ for d in docs])

    # If they're all strings, use string concatenation.
    if types.issubset([str, unicode, basestring]):
        return reduce((lambda a, b: a + b), docs, '')

    # If they're all corpus views, then use ConcatenatedCorpusView.
    for typ in types:
        if not issubclass(typ, AbstractCorpusView):
            break
    else:
        return ConcatenatedCorpusView(docs)

    # Otherwise, see what we can do:
    if len(types) == 1:
        typ = list(types)[0]

        if issubclass(typ, list):
            return reduce((lambda a, b: a + b), docs, [])

        if issubclass(typ, tuple):
            return reduce((lambda a, b: a + b), docs, ())

        if ElementTree.iselement(typ):
            xmltree = ElementTree.Element('documents')
            for doc in docs:
                xmltree.append(doc)
            return xmltree

    # No method found!
    raise ValueError("Don't know how to concatenate types: %r" % types)
Ejemplo n.º 4
0
def concat(docs):
    """
    Concatenate together the contents of multiple documents from a
    single corpus, using an appropriate concatenation function.  This
    utility function is used by corpus readers when the user requests
    more than one document at a time.
    """
    if len(docs) == 1:
        return docs[0]
    if len(docs) == 0:
        raise ValueError('concat() expects at least one object!')
    
    types = set([d.__class__ for d in docs])

    # If they're all strings, use string concatenation.
    if types.issubset([str, unicode, basestring]):
        return reduce((lambda a,b:a+b), docs, '')

    # If they're all corpus views, then use ConcatenatedCorpusView.
    for typ in types:
        if not issubclass(typ, AbstractCorpusView):
            break
    else:
        return ConcatenatedCorpusView(docs)

    # Otherwise, see what we can do:
    if len(types) == 1:
        typ = list(types)[0]

        if issubclass(typ, list):
            return reduce((lambda a,b:a+b), docs, [])
    
        if issubclass(typ, tuple):
            return reduce((lambda a,b:a+b), docs, ())

        if ElementTree.iselement(typ):
            xmltree = ElementTree.Element('documents')
            for doc in docs: xmltree.append(doc)
            return xmltree

    # No method found!
    raise ValueError("Don't know how to concatenate types: %r" % types)