Ejemplo n.º 1
0
 def to_xml(self, value, param_name):
     wrapper = Element(param_name)
     for item_value in value:
         xml_item = Element('item')
         wrapper.append(xml_item)
         wrapper.item[-1] = item_value
     return wrapper
Ejemplo n.º 2
0
 def add_document(self,
                  url,
                  title,
                  categories,
                  published_time,
                  content,
                  author=None,
                  topics=None,
                  links=None,
                  terms=None,
                  document_id=None):
     if url is None or len(url) == 0:
         raise KeyError("'url' is mandatory")
     elif url in self.url_indices:
         log.info(f"Ignoring duplicate URL={url}")
         return
     new_document = Element("document")
     title = Corpus.unicodify(title)
     new_document.document_id = md5(title.encode("utf-8")).hexdigest()[-6:] if document_id is None or \
         len(document_id) == 0 else document_id
     new_document.url = url
     new_document.title = title
     new_document.author = author
     new_document.published_time = published_time
     # handle lists
     new_document.categories = Element("categories")
     if categories: new_document.categories.category = categories
     new_document.topics = Element("topics")
     if topics: new_document.topics.topic = topics
     new_document.links = Element("links")
     if links: new_document.links.link = links
     new_document.content = Element("content")
     if content:
         new_document.content.p = [
             Corpus.unicodify(p) for p in content if p
         ]
     # handle terms
     new_document.terms = Element("terms")
     terms_list = []
     if terms:
         for term in terms:
             term_elmt = Element("term")
             term_elmt.word = term
             term_elmt.locations = Element("locations")
             locations_list = []
             for location in terms[term]:
                 location_elmt = Element("location")
                 location_elmt.begin, location_elmt.end = location
                 locations_list.append(location_elmt)
             term_elmt.locations.location = locations_list
             terms_list.append(term_elmt)
         new_document.terms.term = terms_list
     self.corpus.append(new_document)
     self.url_indices.append(url)
Ejemplo n.º 3
0
    def to_xml(self, value, param_name):
        wrapper = Element(param_name)

        for _dict in value:
            wrapper.append(self.get_xml_dict(_dict, 'dict'))

        return wrapper
Ejemplo n.º 4
0
    def get_xml_dict(self, _dict, name):
        xml_dict = Element(name)

        for k, v in _dict.items():
            xml_item = Element('item')

            key = Element('key')
            value = Element('value')

            xml_item.key = key
            xml_item.value = value

            xml_item.key[-1] = k
            xml_item.value[-1] = v

            xml_dict.append(xml_item)

        return xml_dict
Ejemplo n.º 5
0
def copyNode(node, children=False, parent=False):
    """ Copy an XML Node

    :param node: Etree Node
    :param children: Copy children nodes is set to True
    :param parent: Append copied node to parent if given
    :return: New Element
    """
    if parent is not False:
        element = SubElement(parent,
                             node.tag,
                             attrib=node.attrib,
                             nsmap={None: "http://www.tei-c.org/ns/1.0"})
    else:
        element = Element(node.tag,
                          attrib=node.attrib,
                          nsmap={None: "http://www.tei-c.org/ns/1.0"})
    if children:
        if node.text:
            element._setText(node.text)
        for child in xmliter(node):
            element.append(copy(child))
    return element
Ejemplo n.º 6
0
 def __init__(self, xml_input=None, annotations=None):
     super().__init__("corpus", "document")
     self.corpus = Element("corpus")
     self.url_indices = []
     self.has_terms_locations = False
     self.nlp = stanza.Pipeline("en",
                                processors={
                                    "tokenize": "gum",
                                    "ner": "default",
                                    "lemma": "gum",
                                    "pos": "gum",
                                    "depparse": "gum"
                                },
                                verbose=False,
                                tokenize_no_ssplit=True)
     self.annotations = annotations.documents if annotations else None
     if xml_input:
         if xml_input and not os.path.exists(xml_input):
             raise FileNotFoundError(
                 f"{xml_input} not found. Check the path again.")
         elif os.path.isfile(xml_input):
             self.read_from_xml(xml_input)
         else:
             self.read_from_folder(xml_input)
Ejemplo n.º 7
0
    def getvalue(self, serialize=True):
        """ Gets the actual payload's value converted to a string representing
        either XML or JSON.
        """
        if self.zato_is_xml:
            if self.zato_output_repeated:
                value = Element('item_list')
            else:
                value = Element('item')
        else:
            if self.zato_output_repeated:
                value = []
            else:
                value = {}

        if self.zato_output_repeated:
            output = self.zato_output
        else:
            output = set(dir(self)) & self.zato_all_attrs
            output = [dict((name, getattr(self, name)) for name in output)]

        if output:

            # All elements must be of the same type so it's OK to do it
            is_sa_namedtuple = isinstance(output[0], KeyedTuple)

            for item in output:
                if self.zato_is_xml:
                    out_item = Element('item')
                else:
                    out_item = {}
                for is_required, name in chain(self.zato_required,
                                               self.zato_optional):
                    leave_as_is = isinstance(name, AsIs)
                    elem_value = self._getvalue(name, item, is_sa_namedtuple,
                                                is_required, leave_as_is)

                    if isinstance(name, ForceType):
                        name = name.name

                    if isinstance(elem_value, basestring):
                        elem_value = elem_value if isinstance(
                            elem_value,
                            unicode) else elem_value.decode('utf-8')

                    if self.zato_is_xml:
                        setattr(out_item, name, elem_value)
                    else:
                        out_item[name] = elem_value

                if self.zato_output_repeated:
                    value.append(out_item)
                else:
                    value = out_item

        if self.zato_is_xml:
            em = ElementMaker(annotate=False,
                              namespace=self.namespace,
                              nsmap={None: self.namespace})
            zato_env = em.zato_env(em.cid(self.zato_cid), em.result(ZATO_OK))
            top = getattr(em, self.response_elem)(zato_env)
            top.append(value)
        else:
            top = {self.response_elem: value}
            search = self.zato_meta.get('search')
            if search:
                top['_meta'] = search

        if serialize:
            if self.zato_is_xml:
                deannotate(top, cleanup_namespaces=True)
                return etree.tostring(top)
            else:
                return dumps(top)
        else:
            return top
Ejemplo n.º 8
0
    def convert(self,
                param,
                param_name,
                value,
                has_simple_io_config,
                is_xml,
                date_time_format=None):
        try:
            if any(
                    param_name.startswith(prefix)
                    for prefix in self.bool_parameter_prefixes) or isinstance(
                        param, Boolean):
                value = asbool(
                    value or None
                )  # value can be an empty string and asbool chokes on that

            if value and value is not None:  # Can be a 0
                if isinstance(param, Boolean):
                    value = asbool(value)

                elif isinstance(param, CSV):
                    value = value.split(',')

                elif isinstance(param, List):
                    if is_xml:
                        # We are parsing XML to create a SIO request
                        if isinstance(value, EtreeElement):
                            return [elem.text for elem in value.getchildren()]

                        # We are producing XML out of an SIO response
                        else:
                            wrapper = Element(param_name)
                            for item_value in value:
                                xml_item = Element('item')
                                wrapper.append(xml_item)
                                wrapper.item[-1] = item_value
                            return wrapper

                    # This is a JSON list
                    return value

                elif isinstance(param, Integer):
                    value = int(value)

                elif isinstance(param, Unicode):
                    value = unicode(value)

                elif isinstance(param, UTC):
                    value = value.replace('+00:00', '')

                else:
                    if value and value != ZATO_NONE and has_simple_io_config:
                        if any(param_name==elem for elem in self.int_parameters) or \
                           any(param_name.endswith(suffix) for suffix in self.int_parameter_suffixes):
                            value = int(value)

                if date_time_format and isinstance(value, datetime):
                    value = value.strftime(date_time_format)

            if isinstance(param, CSV) and not value:
                value = []

            return value
        except Exception, e:
            msg = 'Conversion error, param:[{}], param_name:[{}], repr(value):[{}], e:[{}]'.format(
                param, param_name, repr(value), format_exc(e))
            logger.error(msg)

            raise ZatoException(msg=msg)
def new(tag, **extra):
    return Element(qn(tag), **extra)