Ejemplo n.º 1
1
    def source_fragment(self):
        value = self.context.body
        xpath = self.request.get('xpath', None)
        namespaces = {}
        if xpath is None or xpath == '/':
            return value.raw_encoded
        elif xpath.startswith('/cnx:'):
            namespaces = {'cnx': 'http://cnx.rice.edu/cnxml'}

        result = value.raw_encoded
        tree = etree.fromstring(value.raw_encoded)
        result = ''
        for node in tree.xpath(xpath, namespaces=namespaces):
            result += etree.tostring(node)

        # if we don't strip namespaces from the fragment, eip loses the
        # plot completely
        result = etree.fromstring(result)
        for prefix, ns in result.nsmap.items():
            if prefix is None:
                prefix = 'nil'
            etree.strip_attributes(result, '{%s}%s' % (ns, prefix))
        etree.cleanup_namespaces(result)
        result = etree.tostring(result)
        result = result.replace('xmlns="http://cnx.rice.edu/cnxml" ', '')

        return result
Ejemplo n.º 2
0
def main():
    data = sys.stdin.readlines()
    payload = data[0]
    payload_dict = json.loads(payload)

    if "response" in payload_dict and "body" in payload_dict["response"]:
        body = payload_dict["response"]["body"]
        try:
            root = objectify.fromstring(str(body))
            ns = "{http://ws.cdyne.com/}"
            logging.debug("transforming")

            ipe = ns + "ResolveIPResponse"
            ipt = ns + "ResolveIPResult"

            root.Body[ipe][ipt].City = "New York"

            objectify.deannotate(root.Body[ipe][ipt].City)
            etree.cleanup_namespaces(root.Body[ipe][ipt].City)

            payload_dict["response"]["body"] = etree.tostring(root)

            logging.debug(etree.tostring(root))

        except Exception:
            pass

    print(json.dumps(payload_dict))
Ejemplo n.º 3
0
 def _set_content(self, xml_node, html_node):
     if self.content_type == self.MIXEDCONTENT:
         xml_node.append(html_node)
     elif self.content_type == self.CDATA:
         # HACK: drop all namespaces
         # based on https://stackoverflow.com/questions/30232031/
         query = "descendant-or-self::*[namespace-uri()!='']"
         for element in html_node.xpath(query):
             element.tag = etree.QName(element).localname
         etree.cleanup_namespaces(html_node)
         # convert xhtml tree into string (now without xhtml namespace)
         content_b = etree.tostring(
             html_node,
             xml_declaration=False,
             encoding='utf-8',
             pretty_print=False)
         content_str = content_b.decode('utf-8')
         # and finally include this html string as a CDATA section
         xml_node.text = etree.CDATA(content_str)
     elif self.content_type == self.PLAINTEXT:
         xml_node.text = etree.tostring(
             html_node, method="text").decode('utf-8')
     else:
         msg = "unknown content type '{}'".format(self.content_type)
         raise PylatestDocumentError(msg)
Ejemplo n.º 4
0
    def export(
        self, directory: Union[Path, str], filename: Optional[str] = None
    ) -> None:
        """Export the partial label to the filesystem.

        Run the partial label through a series of post-processing steps before exporting
        the completed label to `filename` in `directory.

        If `filename` is not provided, the product ID part of the post-processed label's
        logical identifier (LID) will be used. Please note that this behaviour is
        likely to change in an upcoming release!

        Args:
            directory: Path to the desired output directory.
            filename: Filename override to use for the output label.
        """
        self._eval_deferred_fills()
        self._prune_empty_optionals()
        self._ensure_populated()
        self._check_structure()
        etree.cleanup_namespaces(self.label)
        if filename is None:
            lid = self.label.xpath(ATTR_PATHS["lid"], namespaces=self.nsmap)[0].text
            filename = f"{lid.split(':')[-1].strip()}.xml"  # ExoMars/PSA specific
        if not isinstance(directory, Path):
            directory = Path(directory)
        directory.mkdir(parents=True, exist_ok=True)
        self.label.write(
            str(directory / filename),
            encoding="UTF-8",
            pretty_print=True,
            xml_declaration=True,
        )
Ejemplo n.º 5
0
def publish():
	form = PublishForm()
	if form.validate_on_submit():
		deviceName = str(form.name.data)
		deviceId  = str(form.did.data)
		location = str(form.location.data)
		deviceType  = str(form.dtype.data)
		
		deviceNode = objectify.Element("Device")
		deviceNode.name = deviceName
		deviceNode.id = deviceId
		deviceNode.location = location
		deviceNode.type = deviceType
		deviceNode.value = random.uniform(0,100)
		doc = etree.parse(".\generated\DeviceList-a.xml")
		xmlRoot=doc.getroot()

		xmlRoot.append(deviceNode)
		objectify.deannotate(xmlRoot)
		etree.cleanup_namespaces(xmlRoot)

		xmlfp = open('.\generated\DeviceList-a.xml', 'w')
		xmlstr = etree.tostring(xmlRoot, pretty_print=True, xml_declaration=True)
		xmlstr=xmlstr.decode("utf-8")
		xmlfp.write(xmlstr)
		xmlfp.close()
		return redirect(url_for('index'))	
	return render_template('MAWSPublish.html', form=form)
Ejemplo n.º 6
0
    def save(self, fileName=None):
        '''Save this POM to a file
        
        Of the file already exists, a backup is created.'''
        if not fileName:
            fileName = self.pomFile

        tmp = '%s.tmp' % fileName
        bak = '%s.bak' % fileName

        dir = os.path.dirname(tmp)
        if not os.path.exists(dir):
            os.makedirs(dir)

        if os.path.exists(tmp):
            os.remove(tmp)

        objectify.deannotate(self.xml)
        etree.cleanup_namespaces(self.xml)

        deps = getattr(self.project, 'dependencies', None)
        if deps is not None and len(deps) == 0:
            self.project.dependencies.remove()

        self.xml.write(tmp, encoding="UTF-8", pretty_print=True)

        if os.path.exists(fileName):
            os.rename(fileName, bak)

        os.rename(tmp, fileName)
Ejemplo n.º 7
0
    def egress(self, envelope, http_headers, operation, binding_options):
        """Apply the ws-addressing headers to the given envelope."""

        wsa_action = operation.input.abstract.wsa_action
        if not wsa_action:
            wsa_action = operation.soapaction

        header = get_or_create_header(envelope)
        headers = [
            WSA.Action(wsa_action),
            WSA.MessageID('urn:uuid:' + str(uuid.uuid4())),
            WSA.To(binding_options['address']),
        ]
        header.extend(headers)

        # the top_nsmap kwarg was added in lxml 3.5.0
        if etree.LXML_VERSION[:2] >= (3, 5):
            etree.cleanup_namespaces(
                header,
                keep_ns_prefixes=header.nsmap,
                top_nsmap=self.nsmap)
        else:
            etree.cleanup_namespaces(
                header,
                keep_ns_prefixes=header.nsmap)
        return envelope, http_headers
Ejemplo n.º 8
0
def get_title(pmcid):
    xml_string = get_xml(pmcid)
    if not xml_string:
        return
    tree = etree.fromstring(xml_string.encode('utf-8'))
    # Remove namespaces if any exist
    if tree.tag.startswith('{'):
        for element in tree.getiterator():
            # The following code will throw a ValueError for some
            # exceptional tags such as comments and processing instructions.
            # It's safe to just leave these tag names unchanged.
            try:
                element.tag = etree.QName(element).localname
            except ValueError:
                continue
        etree.cleanup_namespaces(tree)
    # Strip out latex
    _remove_elements_by_tag(tree, 'tex-math')
    # Strip out all content in unwanted elements except the captions
    _replace_unwanted_elements_with_their_captions(tree)
    # First process front element. Titles alt-titles and abstracts
    # are pulled from here.
    front_elements = _select_from_top_level(tree, 'front')
    title_xpath = './article-meta/title-group/article-title'
    for front_element in front_elements:
        for element in front_element.xpath(title_xpath):
            return ' '.join(element.itertext())
Ejemplo n.º 9
0
def adaptNewServerXML(newName="s4", extraServers=0):
    # Define the parser
    parser = etree.XMLParser(remove_blank_text=True)
    # Define the files
    globalFile = ET.parse("/mnt/tmp/pc2/pc2.xml")
    baseServerFile = ET.parse("/mnt/tmp/pc2/s4.xml")
    # Clean new server xml
    baseServerFile._setroot(baseServerFile.find('/vm'))
    # Add needed exec and filetree
    baseServerFile.getroot().append(
        globalFile.findall('.//*[@name="s1"]/exec')[1])
    baseServerFile.getroot().append(
        globalFile.findall('.//*[@name="s1"]/filetree')[0])
    baseServerFile.write("/mnt/tmp/pc2/s4.xml")

    # Second adaptation
    baseServerFile = etree.parse("/mnt/tmp/pc2/s4.xml", parser)
    etree.cleanup_namespaces(baseServerFile)
    baseServerFile.write("/mnt/tmp/pc2/s4.xml", pretty_print=True)

    # Add server to scenario definition
    baseServerFile = ET.parse("/mnt/tmp/pc2/s4.xml")
    globalFile.getroot().insert(13, baseServerFile.getroot())
    globalFile.write("/mnt/tmp/pc2/pc2.xml")
    globalFile2 = etree.parse("/mnt/tmp/pc2/pc2.xml", parser)
    globalFile2.write("/mnt/tmp/pc2/pc2.xml", pretty_print=True)
    pc2_str = unescape(open("/mnt/tmp/pc2/pc2.xml", "r+").read())
    a = open("/mnt/tmp/pc2/pc2.xml", "w+")
    a.write(pc2_str)
    a.close()
Ejemplo n.º 10
0
 def generate(self, **kw):
     x_node = self._generate_node(None, self.root)
     etree.cleanup_namespaces(x_node, top_nsmap=namespaces)
     return etree.tostring(
         x_node, pretty_print=kw.get("pretty_print", True),
         encoding="utf-8", xml_declaration=kw.get("xml_declaration", True)
     ).decode('utf-8')
Ejemplo n.º 11
0
def test_nill():
    schema = xsd.Schema(load_xml("""
        <?xml version="1.0"?>
        <schema xmlns="http://www.w3.org/2001/XMLSchema"
                xmlns:tns="http://tests.python-zeep.org/"
                targetNamespace="http://tests.python-zeep.org/"
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                elementFormDefault="qualified">
          <element name="container">
            <complexType>
              <sequence>
                <element name="foo" type="string" nillable="true"/>
              </sequence>
            </complexType>
          </element>
        </schema>
    """))

    address_type = schema.get_element('ns0:container')
    obj = address_type()
    expected = """
      <document>
        <ns0:container xmlns:ns0="http://tests.python-zeep.org/">
          <ns0:foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true"/>
        </ns0:container>
      </document>
    """
    node = etree.Element('document')
    address_type.render(node, obj)
    etree.cleanup_namespaces(node)

    assert_nodes_equal(expected, node)
Ejemplo n.º 12
0
 def build(cls,
           root_element,
           encoding=None,
           pretty_print=False,
           validate=True):
     if type(root_element) is not cls.root_element:
         raise TypeError(
             "can only build XML documents from root elements of type %s" %
             cls.root_element.__name__)
     element = root_element.to_element()
     if validate and cls.schema is not None:
         cls.schema.assertValid(element)
     # Cleanup namespaces and move element NS mappings to the global scope.
     normalized_element = etree.Element(element.tag,
                                        attrib=element.attrib,
                                        nsmap=dict(
                                            chain(
                                                iter(element.nsmap.items()),
                                                iter(cls.nsmap.items()))))
     normalized_element.text = element.text
     normalized_element.tail = element.tail
     normalized_element.extend(deepcopy(child) for child in element)
     etree.cleanup_namespaces(normalized_element)
     return etree.tostring(normalized_element,
                           encoding=encoding or cls.encoding,
                           method='xml',
                           xml_declaration=True,
                           pretty_print=pretty_print)
Ejemplo n.º 13
0
 def get_value_def_in_sub_element(cls, element: ElementType) -> typing.Union[ValueDef, ObjectDef]:
     tag = etree.QName(element).localname
     if tag in ("str", "int", "bool", "float"):
         type_ = getattr(builtins, tag)
         text = element.text.strip()
         if is_expression(text):
             return EvaluateValueDef(text, type_)
         else:
             return BasicValueDef(text, type_)
     elif tag == "list":
         return cls.get_list_value_def(element)
     elif tag == "dict":
         return cls.get_dict_value_def(element)
     elif tag == "ref":
         return ReferenceValueDef(element.text.strip())
     elif tag == "callable":
         return CallableValueDef(element.text.strip())
     elif tag == "object":
         text = element.text.strip()
         if is_expression(text):
             return EvaluateValueDef(text, object)
         else:
             return cls.get_custom_object_def(element)
     elif tag == "etree":
         if hasattr(element, "nsmap") and get_boolean(element.get("cleanup-namespace", True)):
             etree.cleanup_namespaces(element)
             # for iter_element in element.iter():
             #     iter_element.tag = etree.QName(iter_element).localname
         return ValueDef(list(element)[0])
     else:
         raise ValueError
Ejemplo n.º 14
0
Archivo: xml.py Proyecto: buldi/spyne
def get_object_as_xml(value, cls=None, root_tag_name=None, no_namespace=None):
    '''Returns an ElementTree representation of a
    :class:`spyne.model.complex.ComplexModel` subclass.

    :param value: The instance of the class to be serialized.
    :param value: The root tag string to use. Defaults to the output of
        ``value.__class__.get_type_name_ns()``.
    '''

    if cls is None:
        cls = value.__class__

    if cls.get_namespace() is None and no_namespace is None:
        no_namespace = True

    if no_namespace is None:
        no_namespace = False

    parent = etree.Element("parent")

    xml_object.to_parent_element(cls, value, cls.get_namespace(),
                                                          parent, root_tag_name)

    if no_namespace:
        _dig(parent)
        etree.cleanup_namespaces(parent)

    return parent[0]
Ejemplo n.º 15
0
    def asRdfXml(self):
        rdfElement = self.createElement('rdf:RDF', nsmap=self.namespaces)
        resourceDescriptions = defaultdict(lambda: {'types': set(), 'relations': []})
        for (s, p, o) in self.graph.triples():
            if s.startswith('_:'):
                if len(self._leftHandSides(BNode(s))) == 1:
                    continue
            self._gatherRelation(resourceDescriptions[s], p, o)

        sortedSubjects = [s for s, _ in sorted(resourceDescriptions.items(), key=self._subjectUriOrder)]
        for subject in sortedSubjects:
            try:
                resourceDescription = resourceDescriptions.pop(subject)
            except KeyError:
                continue
            tagCurie = self._tagCurieForNode(subject, resourceDescription)
            attrib = None
            if not subject.startswith('_:'):
                attrib = {'rdf:about': subject}
            else:
                if len(self._leftHandSides(BNode(subject))) > 0:
                    attrib = {'rdf:nodeID': subject.partition('_:')[-1]}
            descriptionNode = self.createSubElement(rdfElement, tagCurie, attrib=attrib)
            self.serializeDescription(descriptionNode, subject, resourceDescription, resourceDescriptions)
        cleanup_namespaces(rdfElement)
        return rdfElement
Ejemplo n.º 16
0
def parse_xml(xml_file):
    with open(xml_file) as f:
        xml = f.read()

    root = objectify.fromstring(xml)

    # Get an element
    to = root.note.to
    print(f'The {to=}')

    # print out all the note element's tags and text values
    for note in root.getchildren():
        for note_element in note.getchildren():
            print(f'{note_element.tag=}, {note_element.text=}')
        print()

    # modify a text value
    print(f'Original: {root.note.to=}')
    root.note.to = 'Guido'
    print(f'Modified: {root.note.to=}')

    # add a new element
    root.note.new_element = "I'm new!"

    # cleanup the XML before writing to disk
    objectify.deannotate(root)
    etree.cleanup_namespaces(root)
    obj_xml = etree.tostring(root, pretty_print=True)

    # save your xml
    with open("lxml_output.xml", "wb") as f:
        f.write(obj_xml)
Ejemplo n.º 17
0
def build_xml(entries, with_attributes=True):
    root = objectify.Element('cityindex')
    root.title = 'Index of major German cities'

    for table_entry in entries:
        if len(table_entry) != 11:
            log("Invalid entry, expected 11 items, got %d: %s" % (len(table_entry), table_entry))
            continue
        name, ew1980, ew1990, ew2000, ew2009, ew2010, area, ewkm, ch0010, first, region = table_entry
        entry = etree.SubElement(root, 'entry')
        entry.city = name
        entry.region = region
        entry.country = 'Deutschland'
        entry.area_km2 = area
        if with_attributes:
            entry.inhabitants = [ew1980, ew1990, ew2000, ew2009, ew2010]
            entry.inhabitants[0].set('year', '1980')
            entry.inhabitants[0].set('year', '1990')
            entry.inhabitants[1].set('year', '2000')
            entry.inhabitants[2].set('year', '2009')
            entry.inhabitants[3].set('year', '2010')
        else:
            entry.inhabitants = ew2010
        entry.inhabitants_per_km2 = ewkm
        entry.development_2000_2010 = ch0010.replace(u'\u2212', '-') # fix minus sign
        entry.major_since = first
        entry.description = u'%s ist eine deutsche Großstadt.' % name

    objectify.deannotate(root)
    etree.cleanup_namespaces(root)
    return root
Ejemplo n.º 18
0
def bdml_export(beam_list, filename, progress=None):
    """Accepts a list of beam objects and saves it to filename in BDML format"""
    
    #Is beam_list a list of beam objects or tuples? (Model passes list of 
    #tuples)
    if isinstance(beam_list[0], tuple):
        beam_list = [j for i in beam_list for j in i if isinstance(j, Beam)]
    
    NSMAP = {'xsi' : "http://www.w3.org/2001/XMLSchema-instance"}
    xml_tree = etree.ElementTree(objectify.Element("{http://www.radpy.org}BDML",
                                                   nsmap=NSMAP))
    xml_root = xml_tree.getroot()
    xml_root.set("{http://www.w3.org/2001/XMLSchema-instance}schemaLocation",
                 "http://www.radpy.org/BDML/BDML.xsd")
    
    for value, i in enumerate(beam_list):
        if progress:
            progress.setValue(value)
        temp = etree.SubElement(xml_root,"{http://www.radpy.org}Beam")
        xml_root.Beam[-1] = i.exportXML()
    
    #Get rid of objectify namespace    
    objectify.deannotate(xml_tree)
    etree.cleanup_namespaces(xml_tree)
    
    file = open(filename,'w')
    xml_tree.write(file, pretty_print=True)
    file.close()
Ejemplo n.º 19
0
    def change_disk_owner(self, user_href, name=None, disk_id=None):
        """Change the ownership of an independent disk to a given user.

        :param user_href: Href of the new owner (user).
        :param name: Name of the independent disk.
        :param disk_id: The id of the disk (required if there are multiple
            disks with same name).

        :return: None

        :raises: Exception: If the named disk cannot be located or some
            other error occurs.
        """
        if self.resource is None:
            self.resource = self.client.get_resource(self.href)

        if disk_id is not None:
            disk = self.get_disk(disk_id=disk_id)
        else:
            disk = self.get_disk(name=name)

        new_owner = disk.Owner
        new_owner.User.set('href', user_href)
        etree.cleanup_namespaces(new_owner)
        return self.client.put_resource(
            disk.get('href') + '/owner/', new_owner, EntityType.OWNER.value)
Ejemplo n.º 20
0
    def addunit(self, unit, new=True):
        """Adds unit to the document

        In addition to the standard addunit, it also tries to move
        namespace definitions to the top <resources> element.
        """
        newns = {}
        do_cleanup = False
        if new:
            newns = self.body.nsmap
            for ns in unit.xmlelement.nsmap:
                if ns not in newns:
                    do_cleanup = True
                    newns[ns] = unit.xmlelement.nsmap[ns]

            # Fixup formatting
            if len(self.body):
                self.body[-1].tail = '\n    '
            unit.xmlelement.tail = '\n'

        super(AndroidResourceFile, self).addunit(unit, new)
        # Move aliased namespaces to the <resources> tag
        # The top_nsmap was introduced in LXML 3.5.0
        if do_cleanup:
            etree.cleanup_namespaces(self.body, top_nsmap=newns)
Ejemplo n.º 21
0
    def egress(self, envelope, http_headers, operation, binding_options):
        """Apply the ws-addressing headers to the given envelope."""

        wsa_action = operation.input.abstract.wsa_action
        if not wsa_action:
            wsa_action = operation.soapaction

        header = get_or_create_header(envelope)
        headers = [
            WSA.Action(wsa_action),
            WSA.MessageID('urn:uuid:' + str(uuid.uuid4())),
            WSA.To(binding_options['address']),
        ]
        header.extend(headers)

        # the top_nsmap kwarg was added in lxml 3.5.0
        if etree.LXML_VERSION[:2] >= (3, 5):
            etree.cleanup_namespaces(
                envelope, top_nsmap={
                    'wsa': 'http://www.w3.org/2005/08/addressing'
                })
        else:
            etree.cleanup_namespaces(envelope)

        return envelope, http_headers
Ejemplo n.º 22
0
def save_game_state(path, game_state):
    def create_board_structure(grid):
        board = objectify.Element('board')
        grid = game_state.grid
        lines = []
        for i in range(len(grid)):
            lines.append('')
            for j in range(len(grid[i])):
                lines[i] += ('{} '.format(grid[i][j]))

        board.row = lines
        return board

    parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
    xml = '''<data></data>'''
    root = objectify.fromstring(xml, parser=parser)
    board = create_board_structure(game_state.grid)
    root.append(board)

    objectify.deannotate(root)
    etree.cleanup_namespaces(root)

    obj_xml = etree.tostring(root, pretty_print=True, xml_declaration=False)

    try:
        with open(path, "wb") as xml_writer:
            xml_writer.write(obj_xml)
    except IOError:
        pass

    print('Saved!')
Ejemplo n.º 23
0
 def save(self, fileName=None):
     '''Save this POM to a file
     
     Of the file already exists, a backup is created.'''
     if not fileName:
         fileName = self.pomFile
     
     tmp = '%s.tmp' % fileName
     bak = '%s.bak' % fileName
     
     dir = os.path.dirname(tmp)
     if not os.path.exists(dir):
         os.makedirs(dir)
     
     if os.path.exists(tmp):
         os.remove(tmp)
     
     objectify.deannotate(self.xml)
     etree.cleanup_namespaces(self.xml)
     
     deps = getattr(self.project, 'dependencies', None)
     if deps is not None and len(deps) == 0:
         self.project.dependencies.remove()
     
     self.xml.write(tmp, encoding="UTF-8", pretty_print=True)
     
     if os.path.exists(fileName):
         os.rename(fileName, bak)
         
     os.rename(tmp, fileName)
Ejemplo n.º 24
0
        def postprocess(ret):
            """Postprocess the final result

            ...also, turn clufter:comment back into (now true) comment form
            """
            #log.debug("Applying postprocess onto {0}".format(etree.tostring(ret)))
            assert len(ret) == 1
            ret = ret[0]
            if ret.getroot().tag == namespaced(CLUFTER_NS, 'snippet'):
                ret = ret.getroot()[0]

            # any "protected" comments are turned into full-fledged ones now
            cl = ret.xpath("//clufter:comment",
                           namespaces={'clufter': CLUFTER_NS})
            for e in cl:
                element_juggler.rebind(etree.Comment(e.text),
                                       element_juggler.grab(e))
                element_juggler.drop(e)

            # XXX: ugly solution to get rid of the unneeded namespace
            # (cleanup_namespaces did not work here)
            ret = etree.fromstring(etree.tostring(ret),
                                   parser=etree_parser_safe)
            etree.cleanup_namespaces(ret)
            return ret
Ejemplo n.º 25
0
def get_object_as_xml(inst, cls=None, root_tag_name=None, no_namespace=False):
    """Returns an ElementTree representation of a
    :class:`spyne.model.complex.ComplexModel` subclass.

    :param inst: The instance of the class to be serialized.
    :param cls: The class to be serialized. Optional.
    :param root_tag_name: The root tag string to use. Defaults to the output of
        ``value.__class__.get_type_name_ns()``.
    :param no_namespace: When true, namespace information is discarded.
    """

    if cls is None:
        cls = inst.__class__

    if cls.get_namespace() is None and no_namespace is None:
        no_namespace = True

    if no_namespace is None:
        no_namespace = False

    parent = etree.Element("parent")
    xml_object.to_parent(None, cls, inst, parent, cls.get_namespace(),
                                                                  root_tag_name)
    if no_namespace:
        _dig(parent)
        etree.cleanup_namespaces(parent)

    return parent[0]
Ejemplo n.º 26
0
def parseXML(xmlfile,entry,value):

    with open(xmlfile) as f:
        xml = f.read()

    root = objectify.fromstring(xml)

    # returns attributes in element node as dict
    attrib = root.attrib

    try:
        exec("%s = %s" %('root.'+entry,str(' '.join(value))))
    except:
        exec("%s = %s" %('root.'+entry,str(value)))
    # print the xml
    obj_xml = etree.tostring(root,pretty_print=True)

    # remove the py:pytype stuff
    objectify.deannotate(root)
    etree.cleanup_namespaces(root)
    obj_xml = etree.tostring(root,pretty_print=True)

    # save your xml
    with open(xmlfile,'w') as f:
        f.write(obj_xml)
Ejemplo n.º 27
0
def parseXML(xmlFile):
    """Parse the XML file"""
    with open(xmlFile) as f:
        xml = f.read()

    root = objectify.fromstring(xml)

    # returns attributes in element node as dict
    attrib = root.attrib

    # how to extract element data
    begin = root.appointment.begin
    uid = root.appointment.uid

    # loop over elements and print their tags and text
    for e in root.appointment.iterchildren():
        print("%s => %s" % (e.tag, e.text))

    # how to change an element's text
    root.appointment.begin = "something else"
    print(root.appointment.begin)

    # how to add a new element
    root.appointment.new_element = "new data"

    # remove the py:pytype stuff
    objectify.deannotate(root)
    etree.cleanup_namespaces(root)
    obj_xml = etree.tostring(root, pretty_print=True)
    print(obj_xml)

    # save your xml
    with open("new.xml", "w") as f:
        f.write(obj_xml)
def get_paths_from_url(url):
    # Download and parse XML listing
    r = requests.get(url)
    root = etree.fromstring(r.text.encode('utf-8'))

    # Remove namespace prefixes
    for elem in root.getiterator():
        elem.tag = etree.QName(elem).localname

    # Remove unused namespace declarations
    etree.cleanup_namespaces(root)

    # Print bucket name for the first time
    global bucket_name_printed
    if not bucket_name_printed:
        print('\033[36mGoogle Storage bucket: {}\033[0m'.format(
            root.find('Name').text))
        bucket_name_printed = True

    # Print URL of current page
    print()
    print('\033[32m{}\033[0m'.format(url))

    next_marker = root.find('NextMarker')
    if next_marker is not None:
        next_marker = next_marker.text

    paths = []
    for child in root.findall('Contents'):
        for subchild in child:
            if subchild.tag == 'Key':
                paths.append(subchild.text)

    return paths, next_marker
    def render(self, request):
        # Make sure this fn is always a generator
        yield defer.succeed(True)

        args = {k.lower(): v for k, v in request.args.items()}

        # Assume POST requests should go through the Transaction capability handler
        # for now since that means we only need to read the request.content file-like
        # object once there. This might need to get more complex in the future if
        # we want to support POST for other capabilities in this WFS version.
        if request.method == b'POST':
            capability_handler = self._capability_handlers.get(b'Transaction')
        else:
            request_type = _first(args.get(b'request', ()), None)

            capability_handler = self._capability_handlers.get(
                request_type, None)

            if not capability_handler:
                raise InvalidWfsRequest(
                    "Unsupported capability: {!r}".format(request_type))

            if not request.method.decode('utf-8').lower().capitalize(
            ) in capability_handler.methods:
                raise InvalidWfsRequest(
                    "Capability: {!r} not supported via HTTP method: {!r}".
                    format(request_type, request.method))

        response_doc = yield capability_handler.handle(self, request, args)

        request.setHeader('Content-Type', WFS_MIMETYPE)
        request.setResponseCode(code=200)
        etree.cleanup_namespaces(response_doc)
        return etree.tostring(response_doc, pretty_print=True)
Ejemplo n.º 30
0
def create_structmap(workspace, filesec, filelist, type_attr=None,
                     root_type=None):
    """Creates METS document element tree that contains structural map.

    :param workspace: directory from which some files are searhed
    :param filesec: fileSec element
    :param filelist: Sorted list of digital objects (file paths)
    :param type_attr: TYPE attribute of structMap element
    :param root_type: TYPE attribute of root div element
    :returns: structural map element
    """

    amdids = get_md_references(workspace, directory='.')
    dmdids = get_md_references(workspace, directory='.', ref_type='dmd')

    if type_attr == 'Directory-physical':
        container_div = mets.div(type_attr='directory', label='.',
                                 dmdid=dmdids, admid=amdids)
    else:
        root_type = root_type if root_type else 'directory'
        container_div = mets.div(type_attr=root_type, dmdid=dmdids,
                                 admid=amdids)

    structmap = mets.structmap(type_attr=type_attr)
    structmap.append(container_div)
    divs = div_structure(filelist)
    create_div(workspace, divs, container_div, filesec,
               filelist, type_attr=type_attr)

    mets_element = mets.mets(child_elements=[structmap])
    ET.cleanup_namespaces(mets_element)
    return ET.ElementTree(mets_element)
Ejemplo n.º 31
0
Archivo: xml.py Proyecto: b8va/everest
 def run(self, data_element):
     objectify.deannotate(data_element)
     etree.cleanup_namespaces(data_element)
     rpr_text = etree.tostring(data_element,
                               pretty_print=True,
                               encoding=text_type)
     self._stream.write(rpr_text)
Ejemplo n.º 32
0
def SOAPtoWPS(tree):
    #NOTE: 
    #The etree output of ComplexData will not contain the OWS/WPS/XSI namespace since this name space is defined in the head of the WPS:Execute
    #The XSI is not necessary in the WPS:Execute, therefore it was deleted and its now inside the ComplexInput (if necessary)
    #An input shouldn't have elements in with OWS/WPS namespace, nevertheless a hack was implemented that allows for their presence.
    #The solution is a tiny hack the XSL file, the WPS/OWS namespace are different from the ComplexInput, something like this: http://REPLACEME/wps/1.0.0
    #When etree is printed the REPLACEME is subtituted by www.opengis.net, creating the correct namespaces for the DOM parsing.
    #The replace is done using module re and set that it has to do only 2 replaces in the beggining. Therefore the replace is independe of the since of XML content
    global process
    from pywps import processes
    
    processID=tree.tag.split("_",1)[-1]
    wps2=pywps.Pywps()
    wps2.inputs={'request': 'getCapabilities', 'version': '1.0.0', 'service': 'wps'}
    from pywps.Wps import Request
    
    request=Request(wps2)
    try:
     	process=[process for process in request.processes if process.identifier in [processID]][0]
    except IndexError:
    #	#If the server url is incorrect the process request will not be found in the WPS process list
    	raise pywps.NoApplicableCode("The requested process is not part of the instance. Check pywps conf file and WSDL. WSDL has to point to the correct wrapper, please check location attribute in address element of WSDL document")	
    XSLTDocIO=open(pywps.XSLT.__path__[0]+"/SOAP2WPS.xsl","r")
   
    XSLTDoc=etree.parse(XSLTDocIO)
   
    transformer=etree.XSLT(XSLTDoc)
    WPSTree = transformer(tree)
    etree.cleanup_namespaces(WPSTree)
    
    XMLOut=etree.tostring(WPSTree)
    XMLOut=re.sub(r'REPLACEME',"www.opengis.net",XMLOut,2)
    return XMLOut
Ejemplo n.º 33
0
 def run(self, data_element):
     objectify.deannotate(data_element)
     etree.cleanup_namespaces(data_element)
     rpr_text = etree.tostring(data_element,
                               pretty_print=True,
                               encoding=text_type)
     self._stream.write(rpr_text)
Ejemplo n.º 34
0
def download():
    count = 0
    for item in get_items():
        count += 1

        id = item.find('./{http://tempuri.org/}MapaDigital').text
        if not id or id == "0":
            continue

        # write out metadata
        data_dir = 'data' + ptree.id2ptree(id)
        if not os.path.isdir(data_dir):
            os.makedirs(data_dir)
        et.cleanup_namespaces(item)
        xml = et.tostring(item, pretty_print=True)

	# no need to refetch
        metadata_file = os.path.join(data_dir, "%s.xml" % id)
        if os.path.isfile(metadata_file):
            continue
        open(metadata_file, "w").write(xml)

        # be nice :)
        time.sleep(1)

        # try to download shapefile
        zip_url = item.find('./{http://tempuri.org/}UrlZip').text
        if zip_url:
            r = requests.get(zip_url, headers={"User-Agent": UA})
            if r.headers['Content-Type'] == 'application/x-zip-compressed':
                zip_file = metadata_file.replace(".xml", ".zip")
                open(zip_file, "wb").write(r.content)
        	print "%s %s %s %s" % (datetime.datetime.now(), count, id, zip_file)
Ejemplo n.º 35
0
def createAndSaveTripXML(data, filename):
    """
       Create an XML file
    """
    xml = '''<?xml version="1.0" encoding="UTF-8"?><trips></trips>'''
    xml = bytes(bytearray(xml, encoding='utf-8'))

    root = objectify.fromstring(xml)

    for trip in data:
        root.append(generateTripElement(trip))

    # remove lxml annotation
    objectify.deannotate(root)
    etree.cleanup_namespaces(root)

    # create the xml string
    obj_xml = etree.tostring(root,
                             pretty_print=True,
                             xml_declaration=True,
                             encoding="utf-8")

    try:
        with open(filename, "wb") as xml_writer:
            xml_writer.write(obj_xml)
        xml_writer.close()
        print("{} created successfully!".format(filename))
    except IOError:
        pass
Ejemplo n.º 36
0
def adaptLogsServerXML(newName="logs"):
    # Define the parser
    parser = etree.XMLParser(remove_blank_text=True)

    #Edit the logs.xml file
    call("cp /mnt/tmp/pc2/s4.xml /mnt/tmp/pc2/logs.xml", shell=True)
    baseServerFile = ET.parse("/mnt/tmp/pc2/logs.xml")
    baseServerFile.getroot().attrib['name'] = 'logs'
    baseServerFile.getroot().findall('./if/ipv4')[0].text = '20.20.3.15/24'
    baseServerFile.getroot().findall('./if/ipv4')[1].text = '20.20.4.15/24'
    baseServerFile.write("/mnt/tmp/pc2/logs.xml")
    print(baseServerFile.getroot().attrib['name'])

    # Second adaptation
    baseServerFile = etree.parse("/mnt/tmp/pc2/logs.xml", parser)
    etree.cleanup_namespaces(baseServerFile)
    baseServerFile.write("/mnt/tmp/pc2/logs.xml", pretty_print=True)

    # Add server to scenario definition
    globalFile = ET.parse("/mnt/tmp/pc2/pc2.xml")
    baseServerFile = ET.parse("/mnt/tmp/pc2/logs.xml")
    globalFile.getroot().insert(14, baseServerFile.getroot())
    globalFile.write("/mnt/tmp/pc2/pc2.xml")
    globalFile2 = etree.parse("/mnt/tmp/pc2/pc2.xml", parser)
    globalFile2.write("/mnt/tmp/pc2/pc2.xml", pretty_print=True)
    pc2_str = unescape(open("/mnt/tmp/pc2/pc2.xml", "r+").read())
    a = open("/mnt/tmp/pc2/pc2.xml", "w+")
    a.write(pc2_str)
    a.close()
Ejemplo n.º 37
0
def parseXML(xmlFile):
    """Parse the XML file"""
    with open(xmlFile) as f:
        xml = f.read()

    root = objectify.fromstring(xml)

    # returns attributes in element node as dict
    attrib = root.attrib

    # how to extract element data
    begin = root.appointment.begin
    uid = root.appointment.uid

    # loop over elements and print their tags and text
    for e in root.appointment.iterchildren():
        print("%s => %s" % (e.tag, e.text))

    # how to change an element's text
    root.appointment.begin = "something else"
    print(root.appointment.begin)

    # how to add a new element
    root.appointment.new_element = "new data"

    # remove the py:pytype stuff
    objectify.deannotate(root)
    etree.cleanup_namespaces(root)
    obj_xml = etree.tostring(root, pretty_print=True)
    print(obj_xml)

    # save your xml
    with open("new.xml", "w") as f:
        f.write(obj_xml)
Ejemplo n.º 38
0
    def change_disk_owner(self, user_href, name=None, disk_id=None):
        """Change the ownership of an independent disk to a given user.

        :param user_href: Href of the new owner (user).
        :param name: Name of the independent disk.
        :param disk_id: The id of the disk (required if there are multiple
            disks with same name).

        :return: None

        :raises: Exception: If the named disk cannot be located or some
            other error occurs.
        """
        if self.resource is None:
            self.resource = self.client.get_resource(self.href)

        if disk_id is not None:
            disk = self.get_disk(disk_id=disk_id)
        else:
            disk = self.get_disk(name=name)

        new_owner = disk.Owner
        new_owner.User.set('href', user_href)
        etree.cleanup_namespaces(new_owner)
        return self.client.put_resource(
            disk.get('href') + '/owner/', new_owner, EntityType.OWNER.value)
Ejemplo n.º 39
0
    def serialize(self, *args, **kwargs):
        """Create a SerializedMessage for this message"""
        nsmap = self.nsmap.copy()
        nsmap.update(self.wsdl.types._prefix_map)

        soap = ElementMaker(namespace=self.nsmap['soap-env'], nsmap=nsmap)
        body = header = None

        # Create the soap:header element
        headers_value = kwargs.pop('_soapheaders', None)
        header = self._serialize_header(headers_value, nsmap)

        # Create the soap:body element
        if self.body:
            body_value = self.body(*args, **kwargs)
            body = soap.Body()
            self.body.render(body, body_value)

        # Create the soap:envelope
        envelope = soap.Envelope()
        if header is not None:
            envelope.append(header)
        if body is not None:
            envelope.append(body)

        # XXX: This is only used in Soap 1.1 so should be moved to the the
        # Soap11Binding._set_http_headers(). But let's keep it like this for
        # now.
        headers = {
            'SOAPAction': '"%s"' % self.operation.soapaction
        }

        etree.cleanup_namespaces(envelope)
        return SerializedMessage(
            path=None, headers=headers, content=envelope)
Ejemplo n.º 40
0
def test_nill():
    schema = xsd.Schema(load_xml("""
        <?xml version="1.0"?>
        <schema xmlns="http://www.w3.org/2001/XMLSchema"
                xmlns:tns="http://tests.python-zeep.org/"
                targetNamespace="http://tests.python-zeep.org/"
                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                elementFormDefault="qualified">
          <element name="container">
            <complexType>
              <sequence>
                <element name="foo" type="string" nillable="true"/>
              </sequence>
            </complexType>
          </element>
        </schema>
    """))

    address_type = schema.get_element('ns0:container')
    obj = address_type()
    expected = """
      <document>
        <ns0:container xmlns:ns0="http://tests.python-zeep.org/">
          <ns0:foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:nil="true"/>
        </ns0:container>
      </document>
    """
    node = etree.Element('document')
    address_type.render(node, obj)
    etree.cleanup_namespaces(node)

    assert_nodes_equal(expected, node)
Ejemplo n.º 41
0
    def emit(self, ctx, modules, fd):
        if not ctx.opts.xml:
            fd.write('No XML file!\n')
            sys.exit(1)
        try:
            doc = etree.parse(open(ctx.opts.xml, 'rb'))
            native = doc.xpath('//*[local-name()="native"]')
            if not native:
                print('No node \'native\'!!')
                sys.exit(1)

            # find the Cisco-IOS-XE-native module
            module_native = None
            for m in modules:
                if m.i_modulename == 'Cisco-IOS-XE-native':
                    module_native = m
                    break
            if not module_native:
                print('Cisco-IOS-XE-native not loaded!')
                sys.exit(1)

            # replace all namespaces from XML doc
            replace_namespaces(module_native, [], native[0])

            # cleanup the namespaces for display
            etree.cleanup_namespaces(doc)

            # print the modified doc
            fd.write(etree.tostring(doc, pretty_print=True).decode())
            fd.write('\n')
        except:
            fd.write('\n')
            fd.write('Failed to process file\n')
Ejemplo n.º 42
0
def parse_with_region(person_list_file="../personlist.xml", city_index_file="../cityindex.xml"):
    """
    Augment document while parsing.
    """
    tree = objectify.parse(person_list_file)
    city_index = objectify.parse(city_index_file).getroot().entry

    person_list = tree.getroot()
    region_element = person_list.makeelement("region")

    for person in person_list.person:
        # find city and country of each person
        city = person.address.city
        country = person.address.country

        region = region_element
        for entry in city_index:
            if entry.city == city and entry.country == country:
                region = entry.region
                break

        # insert region tag after city tag
        city.addnext(deepcopy(region))

        # change birth date to April 1st if born in December
        if person.birthday.month == "December":
            birthday = person.birthday
            birthday.day = 1
            birthday.month = "April"
            birthday.month.set("number", "4")

    # return processed tree
    objectify.deannotate(tree)
    etree.cleanup_namespaces(tree)
    return tree
Ejemplo n.º 43
0
    def clean_tree(self, root):
        for elem in root.getiterator():
            elem.tag = etree.QName(elem).localname

        etree.cleanup_namespaces(root)

        return root
Ejemplo n.º 44
0
    def testAddInitialRecord(self):
        uri = "some:uri"

        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
        <prov:Entity>
            <dcterms:source rdf:resource="http://first.example.org"/>
        </prov:Entity>
    </prov:wasDerivedFrom>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))

        consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(identifier=uri)
        expected = XML(lxmltostring(xpathFirst(lxmlNode, '//rdf:RDF')))
        cleanup_namespaces(expected)
        self.assertXmlEquals(expected, self.storage.getData(identifier=record.identifier, name='rdf'))

        self.assertEquals(set(['rdf']), record.prefixes)
        self.assertEquals(set(), record.sets)

        self.plein.close()
        plein2 = self._newPlein()
        self.assertEquals(['some:uri'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier')])
Ejemplo n.º 45
0
    def serialize(self, sink: Union[IO, str, None], cas: Cas, pretty_print=True) -> Union[str, None]:
        xmi_attrs = {"{http://www.omg.org/XMI}version": "2.0"}

        root = etree.Element(etree.QName(self._nsmap["xmi"], "XMI"), nsmap=self._nsmap, **xmi_attrs)

        self._serialize_cas_null(root)

        # Find all fs, even the ones that are not directly added to a sofa
        for fs in sorted(cas._find_all_fs(), key=lambda a: a.xmiID):
            self._serialize_feature_structure(cas, root, fs)

        for sofa in cas.sofas:
            self._serialize_sofa(root, sofa)

        for view in cas.views:
            self._serialize_view(root, view)

        doc = etree.ElementTree(root)
        etree.cleanup_namespaces(doc, top_nsmap=self._nsmap)

        return_str = sink is None
        if return_str:
            sink = BytesIO()

        doc.write(sink, xml_declaration=True, pretty_print=pretty_print, encoding="UTF-8")

        if return_str:
            return sink.getvalue().decode("utf-8")

        return None
Ejemplo n.º 46
0
        def postprocess(ret):
            """Postprocess the final result

            ...also, turn clufter:comment back into (now true) comment form
            """
            #log.debug("Applying postprocess onto {0}".format(etree.tostring(ret)))
            assert len(ret) == 1
            ret = ret[0]
            if ret.getroot().tag == namespaced(CLUFTER_NS, 'snippet'):
                ret = ret.getroot()[0]

            # any "protected" comments are turned into full-fledged ones now
            cl = ret.xpath("//clufter:comment",
                           namespaces={'clufter': CLUFTER_NS})
            for e in cl:
                element_juggler.rebind(etree.Comment(e.text),
                                       element_juggler.grab(e))
                element_juggler.drop(e)

            # XXX: ugly solution to get rid of the unneeded namespace
            # (cleanup_namespaces did not work here)
            ret = etree.fromstring(etree.tostring(ret),
                                   parser=etree_parser_safe)
            etree.cleanup_namespaces(ret)
            return ret
Ejemplo n.º 47
0
    def addDependency(self, d):
        if isinstance(d, Dependency):
            d = d._pomElement
        
        objectify.deannotate(d)
        etree.cleanup_namespaces(d)

        self._pomElement.dependencies.append(d)
Ejemplo n.º 48
0
 def import_metadata(self, destination, model_year):
     x = xp.XMLParser(self.input_file)
     y = copy.deepcopy(x)
     y.root.idinfo.timeperd.timeinfo.sngdate.caldate = model_year 
     objectify.deannotate(y.tree)
     etree.cleanup_namespaces(y.tree)
     output_file = destination + '/metadata.xml'
     y.tree.write(output_file, pretty_print=True)
Ejemplo n.º 49
0
def build_xmlrequest(request_attributes={}, subelements=[]):
    """Build an XML request string
    """
    request_element = build_basic_element('request', request_attributes, subelements)
    root = build_basic_element('tellervo', subelements = [request_element])
    objectify.deannotate(root, xsi_nil = True)
    etree.cleanup_namespaces(root)
    return etree.tostring(root)
Ejemplo n.º 50
0
 def run(self, data_element):
     objectify.deannotate(data_element)
     etree.cleanup_namespaces(data_element)
     encoding = self.get_option('encoding')
     self._stream.write(etree.tostring(data_element,
                                       pretty_print=True,
                                       encoding=encoding,
                                       xml_declaration=True))
Ejemplo n.º 51
0
def to_xml(node):
    # from lxml import objectify
    # objectify.deannotate(node, cleanup_namespaces=True)
    # return objectify.dump(node)
    import copy
    orphan = copy.deepcopy(node)
    etree.cleanup_namespaces(orphan)
    return etree.tostring(orphan, pretty_print=True)
Ejemplo n.º 52
0
def hr_xml_to_json(xml, business_unit):
    """
    Cleans a job coming from an HR-XML document. This should add any
    required fields, and re-format any fields that are not coming in
    in the required format.

    This function is also used for markdown testing, and should not make
    any changes to the database or solr.

    inputs:
        :xml: an HR-XML document
        :business unit: the business unit the job is coming from
        :create_redirect: flags whether or not a redirect for the job link
                          should be added to the redirect table

    outputs:
        A solr-ready job as a dictionary

    """
    # Clean up the xml document
    for elem in xml.getiterator():
        i = elem.tag.find('}')
        if i >= 0:
            elem.tag = elem.tag[i + 1:]
    etree.cleanup_namespaces(xml)

    # Get some useful references
    app = xml.xpath('.//ApplicationArea')[0]
    data = xml.xpath('.//PositionOpening')[0]

    guid = data.find(".//*[@schemeName='juid']").text
    logger.debug("Parsing job %s", guid)

    reqid = data.find(".//*[@schemeName='reqid']").text
    city = data.find('.//CityName').text
    city = city if city not in ['', 'XX'] else None
    state_code = data.find('.//CountrySubDivisionCode').text
    state_short = state_code if state_code in states.keys() else None
    state = states.get(state_code, None)
    country_short = data.find('.//CountryCode').text
    if country_short in [None, '', 'XXX']:
        country = country_short = ""
    else:
        country = Country.objects.get(abbrev=country_short).name
    title = data.find('.//PositionTitle').text
    description = data.find('.//PositionFormattedDescription/Content').text
    link = data.find('.//Communication/URI').text

    latitude = data.find('.//SpatialLocation/Latitude').text
    longitude = data.find('.//SpatialLocation/Longitude').text

    # Lookup the company.  (Assumes that company is 1-to-1 on BusinessUnit)
    try:
        company = business_unit.company_set.all()[0]
    except Company.DoesNotExist, Company.MultipleObjectsReturned:
        logger.error("Unable to find Company for BusinessUnit %s",
                     business_unit)
        return None
Ejemplo n.º 53
0
def hr_xml_to_json(xml, business_unit):
    """
    Cleans a job coming from an HR-XML document. This should add any
    required fields, and re-format any fields that are not coming in
    in the required format.

    This function is also used for markdown testing, and should not make
    any changes to the database or solr.

    inputs:
        :xml: an HR-XML document
        :business unit: the business unit the job is coming from
        :create_redirect: flags whether or not a redirect for the job link
                          should be added to the redirect table

    outputs:
        A solr-ready job as a dictionary

    """
    # Clean up the xml document
    for elem in xml.getiterator():
        i = elem.tag.find('}')
        if i >= 0:
            elem.tag = elem.tag[i + 1:]
    etree.cleanup_namespaces(xml)

    # Get some useful references

    guid = xml.find("./jobuuid").text
    logger.debug("Parsing job %s", guid)

    reqid = xml.find("./reqid").text
    city = xml.find('.//locations/loc[@type="primary"]/city').text
    city = city if city not in ['', 'XX'] else None
    state_code = xml.find('./locations/loc[@type="primary"]/state').text
    state_short = state_code if state_code in states.keys() else None
    state = states.get(state_code, None)
    country_short = xml.find('./locations/loc[@type="primary"]/country').text
    if country_short in [None, '', 'XXX']:
        country = country_short = ""
    else:
        country = Country.objects.get(abbrev=country_short).name
    zipcode = xml.find('./locations/loc[@type="primary"]/postalcode')
    title = xml.find('./jobtitle').text
    description = xml.find('./jobdescription').text
    link = xml.find('./joburl').text

    latitude = xml.find('./locations/loc[@type="primary"]/latitude').text
    longitude = xml.find('./locations/loc[@type="primary"]/longitude').text

    # Lookup the company.  (Assumes that company is 1-to-1 on BusinessUnit)
    try:
        company = business_unit.company_set.all()[0]
    except Company.DoesNotExist, Company.MultipleObjectsReturned:
        logger.error("Unable to find Company for BusinessUnit %s",
                     business_unit)
        return None
Ejemplo n.º 54
0
    def write_tree(self, file_name):
        # Deannotate the tree
        objectify.deannotate(self.tree)
        etree.cleanup_namespaces(self.tree)

        # Ensure the newly created XML validates against the schema
        utilities.validate_xml(self.tree, self.xml_schema_file)

        # Write out the tree
        self.tree.write(file_name, pretty_print=True)
Ejemplo n.º 55
0
def create_xml(*functions):
    xml = '<?xml version="1.0" encoding="UTF-8"?><root></root>'
    root = objectify.fromstring(xml)
    node = []
    node_append = lambda f : map(node_append,f) if isinstance(f,list) else node.append(f())
    map(node_append, functions)
    map(root.append, node)
    objectify.deannotate(root)
    etree.cleanup_namespaces(root)
    return root
Ejemplo n.º 56
0
 def _clone_without_own_ns(self):
     new = self.clone()
     for node in new.xpath("//meld:*", namespaces={"meld": NS}):
         node.getparent().remove(node)
     for node in new.xpath("//*[@*[namespace-uri()='{}']]".format(NS)):
         to_remove = [k for k in node.attrib.keys() if etree.QName(k).namespace == NS]
         for k in to_remove:
             del node.attrib[k]
     etree.cleanup_namespaces(new)
     return new
Ejemplo n.º 57
0
def remove_namespaces(xml):

    for elem in xml.getiterator():
        if elem.tag is etree.Comment:
            continue
        i = elem.tag.find('}')
        if i > 0:
            elem.tag = elem.tag[i + 1:]

    etree.cleanup_namespaces(xml)
    return xml
def get_aligned_frames_xml(tokenized, frame_instances, root):
    # read DRG
    tuples = get_drg(tokenized)
    drgparser = drg.DRGParser()
    d = drgparser.parse_tup_lines(tuples)

    for instance_id, frame_instance in frame_instances.iteritems():
        if len(frame_instance['roles']) > 0:
            try:
                framebase_id = "{0}-{1}".format(frame_instance['frame'], offset2wn[frame_instance['synset']].split("#")[0].replace('-', '.'))
            except:
                log.info('No mapping found for synset {0}'.format(frame_instance['synset']))
                continue
            tag_frameinstance = objectify.SubElement(root, "frameinstance")
            tag_frameinstance.attrib['id']=instance_id
            tag_frameinstance.attrib['type']=framebase_id
            tag_frameinstance.attrib['internalvariable']=frame_instance['variable']

            for reificated_frame_var in d.reificated[frame_instance['variable']]:
                tag_framelexicalization = objectify.SubElement(tag_frameinstance, "framelexicalization")
                surface = []
                unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=False)
                tag_framelexicalization[0] = ' '.join(surface)
                tag_instancelexicalization = objectify.SubElement(tag_frameinstance, "instancelexicalization")
                surface = []
                unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=True)
                tag_instancelexicalization[0] = ' '.join(surface)
                tag_frameelements = objectify.SubElement(tag_frameinstance, "frameelements")
                for role, (variable, filler) in frame_instance['roles'].iteritems():
                    tag_frameelement = objectify.SubElement(tag_frameelements, "frameelement")
                    tag_frameelement.attrib['role'] = role
                    tag_frameelement.attrib['internalvariable'] = variable
                    tag_concept = objectify.SubElement(tag_frameelement, "concept")
                    tag_concept[0] = filler
                    try:
                        for reificated_role_var in d.reificated[variable]:
                            # composed lexicalization
                            surface = unboxer.generate_from_relation(d, reificated_frame_var, reificated_role_var)
                            if surface != None:
                                tag_rolelexicalization = objectify.SubElement(tag_frameelement, "rolelexicalization")
                                tag_rolelexicalization[0] = surface

                                # complete surface forms
                                surface = []
                                unboxer.generate_from_referent(d, reificated_role_var, surface, complete=True)
                                tag_conceptlexicalization = objectify.SubElement(tag_frameelement, "conceptlexicalization")
                                tag_conceptlexicalization[0] = ' '.join(surface)
                    except:
                        log.error("error with DRG reification: {0}".format(variable))

    objectify.deannotate(root, xsi_nil=True)
    etree.cleanup_namespaces(root)
    return etree.tostring(root, pretty_print=True)