Python parse Examples, defusedxml.lxml.parse Python Examples

Example #1

0

Show file

File: iati.py Project: OpenDataServices/cove

def get_tree(data_file):
    with open(data_file, 'rb') as fp:
        try:
            tree = etree.parse(fp)
        except lxml.etree.XMLSyntaxError as err:
            raise CoveInputDataError(context={
                'sub_title': _("Sorry, we can't process that data"),
                'link': 'index',
                'link_text': _('Try Again'),
                'msg': _(format_html('We think you tried to upload a XML file, but it is not well formed XML.'
                         '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                         '</span> <strong>Error message:</strong> {}', err)),
                'error': format(err)
            })
        except UnicodeDecodeError as err:
            raise CoveInputDataError(context={
                'sub_title': _("Sorry, we can't process that data"),
                'link': 'index',
                'link_text': _('Try Again'),
                'msg': _(format_html('We think you tried to upload a XML file, but the encoding is incorrect.'
                         '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                         '</span> <strong>Error message:</strong> {}', err)),
                'error': format(err)
            })
        return tree

Example #2

0

Show file

File: util.py Project: ecederstrand/exchangelib

def to_xml(bytes_content):
    # Converts bytes or a generator of bytes to an XML tree
    # Exchange servers may spit out the weirdest XML. lxml is pretty good at recovering from errors
    if isinstance(bytes_content, bytes):
        stream = io.BytesIO(bytes_content)
    else:
        stream = BytesGeneratorIO(bytes_content)
    forgiving_parser = _forgiving_parser.getDefaultParser()
    try:
        return parse(stream, parser=forgiving_parser)
    except AssertionError as e:
        raise ParseError(e.args[0], '<not from file>', -1, 0)
    except _etree.ParseError as e:
        if hasattr(e, 'position'):
            e.lineno, e.offset = e.position
        if not e.lineno:
            raise ParseError(text_type(e), '<not from file>', e.lineno, e.offset)
        try:
            stream.seek(0)
            offending_line = stream.read().splitlines()[e.lineno - 1]
        except IndexError:
            raise ParseError(text_type(e), '<not from file>', e.lineno, e.offset)
        else:
            offending_excerpt = offending_line[max(0, e.offset - 20):e.offset + 20]
            msg = '%s\nOffending text: [...]%s[...]' % (text_type(e), offending_excerpt)
            raise ParseError(msg, e.lineno, e.offset)
    except TypeError:
        stream.seek(0)
        raise ParseError('This is not XML: %r' % stream.read(), '<not from file>', -1, 0)

Example #3

0

Show file

File: models.py Project: titusz/onixcheck

    def xml_tree(self):
        """
        Parse the infile with lxml and add the proper namespace if required.

        :return etree.ElementTree: An lxml ElementTree with proper namespace
        """
        if hasattr(self.infile, 'seek'):
            self.infile.seek(0)

        tree = lxml.parse(self.infile)

        if self.meta.namespaces:
            return tree

        log.debug('Adding namespaces to xml for validation')
        root = tree.getroot()
        ns_root = etree.Element(
            tree.docinfo.root_name,
            root.attrib,
            nsmap={None: self.meta.get_ns_string()}
        )
        ns_root[:] = root[:]

        # Roundtrip to add namespace
        doc = lxml.tostring(
            ns_root,
            encoding=tree.docinfo.encoding,
            xml_declaration=True,
            pretty_print=True
        )
        ns_tree = lxml.fromstring(doc)
        return etree.ElementTree(ns_tree)

Example #4

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testXInclude(self):
		XINCLUDE = "{http://www.w3.org/2001/XInclude}" 
		tree = _LXML.parse('../../xml_files_windows/xinclude.xml')
		root = tree.getroot()
		self.assertEquals("data", root.tag)
		child = root[0]
		self.assertEquals(XINCLUDE + "include", child.tag)

Example #5

0

Show file

File: serializers.py Project: dszafranek/commcare-hq

 def case_to_etree(self, case):
     '''
     Encapsulates the version passed to `CommCareCase.to_xml` and
     the temporary hack of re-parsing it. TODO: expose a direct etree
     encoding in casexml?
     '''
     return lxml.parse(BytesIO(case.to_xml('2.0'))).getroot()

Example #6

0

Show file

File: util.py Project: ecederstrand/exchangelib

 def parse_bytes(self, xml_bytes):
     root = parse(io.BytesIO(xml_bytes))
     for elem in root.iter():
         for attr in set(elem.keys()) & {'RootItemId', 'ItemId', 'Id', 'RootItemChangeKey', 'ChangeKey'}:
             elem.set(attr, 'DEADBEEF=')
         for s in self.forbidden_strings:
             elem.text.replace(s, '[REMOVED]')
     return root

Example #7

0

Show file

File: document.py Project: kevinlondon/grab

    def _build_dom(self, content, mode):
        from lxml.html import HTMLParser
        from lxml.etree import XMLParser
        from defusedxml.lxml import parse

        assert mode in ('html', 'xml')
        if mode == 'html':
            if not hasattr(THREAD_STORAGE, 'html_parser'):
                THREAD_STORAGE.html_parser = HTMLParser()
            dom = parse(StringIO(content),
                        parser=THREAD_STORAGE.html_parser)
            return dom.getroot()
        else:
            if not hasattr(THREAD_STORAGE, 'xml_parser'):
                THREAD_STORAGE.xml_parser = XMLParser()
            dom = parse(BytesIO(content),
                        parser=THREAD_STORAGE.xml_parser)
            return dom.getroot()

Example #8

0

Show file

File: iati.py Project: OpenDataServices/cove

def validate_against_schema(schema_path, tree):
    with open(schema_path) as schema_fp:
        schema_tree = etree.parse(schema_fp)

    schema = lxml.etree.XMLSchema(schema_tree)
    schema.validate(tree)
    lxml_errors = lxml_errors_generator(schema.error_log)
    errors_all = format_lxml_errors(lxml_errors)
    invalid_data = bool(schema.error_log)
    return errors_all, invalid_data

Example #9

0

Show file

File: models.py Project: titusz/onixcheck

    def from_file(cls, infile):
        """
        Contruct OnixMeta from an infile.

        :param infile: File or Path to file
        :type infile: file or str
        :return OnixMeta: Initialized OnixMeta instance
        """
        tree = lxml.parse(infile)
        return cls.from_tree(tree)

Example #10

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testURLInvocation_noNamespaceSchemaLocation(self):                      
		#Reset the server back to "0"
		r = requests.get("http://127.0.0.1:5000/reset")
		url_counter = "http://127.0.0.1:5000/getCounter"
		r = requests.get(url_counter)
		request_content = r.text.replace("\r\n","")
		self.assertEqual("0", request_content)
		
		tree = _LXML.parse('../../xml_files_windows/ssrf/url_invocation_noNamespaceSchemaLocation.xml')                      

		#Check if a request has been made
		r = requests.get(url_counter)
		request_content = r.text.replace("\r\n","")
		self.assertEqual("0", request_content)

Example #11

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testURLInvocation_parameterEntity(self):                                    
		#Reset the server back to "0"                                           
		r = requests.get("http://127.0.0.1:5000/reset")                         
		url_counter = "http://127.0.0.1:5000/getCounter"                        
		r = requests.get(url_counter)                                           
		request_content = r.text.replace("\r\n","")                             
		self.assertEqual("0", request_content)     

		with self.assertRaises(EntitiesForbidden):
			tree = _LXML.parse('../../xml_files_windows/ssrf/url_invocation_parameterEntity.xml')

		#Check if a request has been made                                       
		r = requests.get(url_counter)                                           
		request_content = r.text.replace("\r\n","")                             
		self.assertEqual("0", request_content)

Example #12

0

Show file

File: tests.py Project: OpenDataServices/cove

def test_ruleset_error_exceptions_handling(validated_data):
    return_on_error = [{'message': 'There was a problem running ruleset checks', 'exception': True}]

    file_path = os.path.join('cove_iati', 'fixtures', 'basic_iati_unordered_valid.xml')
    with open(file_path) as fp:
        valid_data_tree = etree.parse(fp)
    upload_dir = os.path.join('media', str(uuid.uuid4()))
    ruleset_errors = iati.get_iati_ruleset_errors(
        valid_data_tree,
        os.path.join(upload_dir, 'ruleset'),
        ignore_errors=False,
        return_on_error=return_on_error
    )
    assert ruleset_errors != return_on_error

    file_path = os.path.join('cove_iati', 'fixtures', 'basic_iati_ruleset_errors.xml')
    with open(file_path) as fp:
        invalid_data_tree = etree.parse(fp)
    invalid_data_tree = etree.fromstring(INVALID_DATA)
    upload_dir = os.path.join('media', str(uuid.uuid4()))
    ruleset_errors = iati.get_iati_ruleset_errors(
        invalid_data_tree,  # Causes an exception in ruleset checks
        os.path.join(upload_dir, 'ruleset'),
        ignore_errors=True,  # Exception ignored
        return_on_error=return_on_error
    )
    assert ruleset_errors == return_on_error

    with pytest.raises(AttributeError):
        upload_dir = os.path.join('media', str(uuid.uuid4()))
        ruleset_errors = iati.get_iati_ruleset_errors(
            invalid_data_tree,  # Causes an exception in ruleset checks
            os.path.join(upload_dir, 'ruleset'),
            ignore_errors=False,  # Exception not ignored
            return_on_error=return_on_error
        )

Example #13

0

Show file

File: workflows.py Project: petro-rudenko/hue

def import_workflow_root(workflow, workflow_definition_root, metadata=None, fs=None):
  try:
    xslt_definition_fh = open("%(xslt_dir)s/workflow.xslt" % {
      'xslt_dir': os.path.join(DEFINITION_XSLT_DIR.get(), 'workflows')
    })

    tag = etree.QName(workflow_definition_root.tag)
    schema_version = tag.namespace

    # Ensure namespace exists
    if schema_version not in OOZIE_NAMESPACES:
      raise RuntimeError(_("Tag with namespace %(namespace)s is not valid. Please use one of the following namespaces: %(namespaces)s") % {
        'namespace': workflow_definition_root.tag,
        'namespaces': ', '.join(OOZIE_NAMESPACES)
      })

    # Get XSLT
    xslt = parse(xslt_definition_fh)
    xslt_definition_fh.close()
    transform = etree.XSLT(xslt)

    # Transform XML using XSLT
    transformed_root = transform(workflow_definition_root)

    # Resolve workflow dependencies and node types and link dependencies
    nodes = _prepare_nodes(workflow, transformed_root)
    _preprocess_nodes(workflow, transformed_root, workflow_definition_root, nodes, fs)
    _save_nodes(workflow, nodes)
    _save_links(workflow, workflow_definition_root)
    _assign_workflow_properties(workflow, workflow_definition_root, schema_version)
    if metadata:
      _process_metadata(workflow, metadata)

    # Update workflow attributes
    workflow.schema_version = schema_version
    workflow.name = workflow_definition_root.get('name')
    workflow.save()
  except:
    workflow.delete(skip_trash=True)
    raise

Example #14

0

Show file

File: import_workflow.py Project: OpenPOWER-BigData/HDP-hue

def import_workflow(workflow, workflow_definition, fs=None):
  xslt_definition_fh = open("%(xslt_dir)s/workflow.xslt" % {
    'xslt_dir': DEFINITION_XSLT_DIR.get()
  })

  # Parse Workflow Definition
  workflow_definition_root = fromstring(workflow_definition)


  if workflow_definition_root is None:
    raise RuntimeError(_("Could not find any nodes in Workflow definition. Maybe it's malformed?"))

  ns = workflow_definition_root.tag[:-12] # Remove workflow-app from tag in order to get proper namespace prefix
  schema_version = ns and ns[1:-1] or None

  # Ensure namespace exists
  if schema_version not in OOZIE_NAMESPACES:
    raise RuntimeError(_("Tag with namespace %(namespace)s is not valid. Please use one of the following namespaces: %(namespaces)s") % {
      'namespace': workflow_definition_root.tag,
      'namespaces': ', '.join(OOZIE_NAMESPACES)
    })

  # Get XSLT
  xslt = parse(xslt_definition_fh)
  xslt_definition_fh.close()
  transform = etree.XSLT(xslt)

  # Transform XML using XSLT
  transformed_root = transform(workflow_definition_root)

  # Resolve workflow dependencies and node types and link dependencies
  nodes = _prepare_nodes(workflow, transformed_root)
  _preprocess_nodes(workflow, transformed_root, workflow_definition_root, nodes, fs)
  _save_nodes(workflow, nodes)
  _save_links(workflow, workflow_definition_root)

  # Update schema_version
  workflow.schema_version = schema_version
  workflow.save()

Example #15

0

Show file

File: workflows.py Project: petro-rudenko/hue

def _resolve_subworkflow_from_deployment_dir(fs, workflow, app_path):
  """
  Resolves subworkflow in a subworkflow node
  Looks at path and interrogates all workflows until the proper deployment path is found.
  If the proper deployment path is never found, then
  """
  if not fs:
    raise RuntimeError(_("No hadoop file system to operate on."))

  if app_path.endswith('/'):
    app_path = app_path[:-1]
  if app_path.startswith('hdfs://'):
    app_path = app_path[7:]

  try:
    f = fs.open('%s/workflow.xml' % app_path)
    root = parse(f)
    f.close()
    return Workflow.objects.get(name=root.attrib['name'], owner=workflow.owner, managed=True)
  except IOError:
    pass
  except (KeyError, AttributeError), e:
    raise RuntimeError(_("Could not find workflow name when resolving subworkflow."))

Example #16

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testDOS_entitySize(self):
		#with self.assertRaises(XMLSyntaxError):
		with self.assertRaises(EntitiesForbidden):
			tree = _LXML.parse('../../xml_files_windows/dos/dos_entitySize.xml')

Example #17

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testDOS_indirections(self):
		with self.assertRaises(XMLSyntaxError):			
			tree = _LXML.parse('../../xml_files_windows/dos/dos_indirections.xml')

Example #18

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testDOS_core(self):	
		with self.assertRaises(EntitiesForbidden):	
			tree = _LXML.parse('../../xml_files_windows/dos/dos_core.xml')

Example #19

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testDefault_noAttack(self):		
		tree = _LXML.parse('../../xml_files_windows/standard.xml')
		root = tree.getroot()
		self.assertIn("4",root.text)

Example #20

0

Show file

File: xml_document_emitter.py Project: hakerlol/puthon_head_first

 def _load_tree(self, addon_path):
     if addon_path.path.suffix.lower() == ".xml":
         with addon_path.path.open(mode="rb") as f:
             root_node = lxml.parse(f).getroot()
         yield XMLDocument(addon_path.addon, addon_path.path, root_node)

Example #21

0

Show file

File: util.py Project: caroid/exchangelib-1

 def parse_bytes(xml_bytes):
     return parse(io.BytesIO(xml_bytes))

Example #22

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testParameterEntity_core(self):
		with self.assertRaises(EntitiesForbidden):						
			tree = _LXML.parse('../../xml_files_windows/xxep/parameterEntity_core.xml')

Example #23

0

Show file

File: xml_utils.py Project: srekal/odd

def get_root(xml_filename: pathlib.Path):
    with xml_filename.open(mode="rb") as f:
        return lxml.parse(f).getroot()

Example #24

0

Show file

File: iati.py Project: patxiworks/cove

def common_checks_context_iati(context,
                               upload_dir,
                               data_file,
                               file_type,
                               api=False):
    schema_aiti = SchemaIATI()
    lxml_errors = {}
    cell_source_map = {}
    validation_errors_path = os.path.join(upload_dir,
                                          'validation_errors-2.json')

    with open(data_file) as fp, open(schema_aiti.activity_schema) as schema_fp:
        try:
            tree = etree.parse(fp)
        except lxml.etree.XMLSyntaxError as err:
            raise CoveInputDataError(
                context={
                    'sub_title':
                    _("Sorry we can't process that data"),
                    'link':
                    'index',
                    'link_text':
                    _('Try Again'),
                    'msg':
                    _('We think you tried to upload a XML file, but it is not well formed XML.'
                      '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                      '</span> <strong>Error message:</strong> {}'.format(
                          err)),
                    'error':
                    format(err)
                })
        schema_tree = etree.parse(schema_fp)
        schema = lxml.etree.XMLSchema(schema_tree)
        schema.validate(tree)
        lxml_errors = lxml_errors_generator(schema.error_log)
        ruleset_errors = get_ruleset_errors(
            tree, os.path.join(upload_dir, 'ruleset'))

    errors_all = format_lxml_errors(lxml_errors)

    if file_type != 'xml':
        with open(os.path.join(upload_dir,
                               'cell_source_map.json')) as cell_source_map_fp:
            cell_source_map = json.load(cell_source_map_fp)

    if os.path.exists(validation_errors_path):
        with open(validation_errors_path) as validation_error_fp:
            validation_errors = json.load(validation_error_fp)
    else:
        validation_errors = get_xml_validation_errors(errors_all, file_type,
                                                      cell_source_map)
        if not api:
            with open(validation_errors_path, 'w+') as validation_error_fp:
                validation_error_fp.write(json.dumps(validation_errors))

    context.update({
        'validation_errors': sorted(validation_errors.items()),
        'ruleset_errors': ruleset_errors,
    })
    if not api:
        context.update({
            'validation_errors_count':
            sum(len(value) for value in validation_errors.values()),
            'ruleset_errors_count':
            len(ruleset_errors),
            'cell_source_map':
            cell_source_map,
            'first_render':
            False
        })

    return context

Example #25

0

Show file

    def load_thesaurus(self, input_file, name, store):

        RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        XML_URI = 'http://www.w3.org/XML/1998/namespace'

        ABOUT_ATTRIB = f"{{{RDF_URI}}}about"
        LANG_ATTRIB = f"{{{XML_URI}}}lang"

        ns = {
            'rdf': RDF_URI,
            'foaf': 'http://xmlns.com/foaf/0.1/',
            'dc': 'http://purl.org/dc/elements/1.1/',
            'dcterms': 'http://purl.org/dc/terms/',
            'skos': 'http://www.w3.org/2004/02/skos/core#'
        }

        tfile = dlxml.parse(input_file)
        root = tfile.getroot()

        scheme = root.find('skos:ConceptScheme', ns)
        if not scheme:
            raise CommandError("ConceptScheme not found in file")

        titles = scheme.findall('dc:title', ns)

        default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None)
        available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB)
        thesaurus_title = determinate_value(available_lang, default_lang)

        descr = scheme.find('dc:description', ns).text if scheme.find(
            'dc:description', ns) else thesaurus_title
        date_issued = scheme.find('dcterms:issued', ns).text
        about = scheme.attrib.get(ABOUT_ATTRIB)

        print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}')

        thesaurus = Thesaurus()
        thesaurus.identifier = name

        thesaurus.title = thesaurus_title
        thesaurus.description = descr
        thesaurus.about = about
        thesaurus.date = date_issued

        if store:
            thesaurus.save()

        for lang in available_lang:
            if lang[0] is not None:
                thesaurus_label = ThesaurusLabel()
                thesaurus_label.lang = lang[0]
                thesaurus_label.label = lang[1]
                thesaurus_label.thesaurus = thesaurus
                thesaurus_label.save()

        for concept in root.findall('skos:Concept', ns):
            about = concept.attrib.get(ABOUT_ATTRIB)
            alt_label = concept.find('skos:altLabel', ns)
            if alt_label is not None:
                alt_label = alt_label.text
            else:
                concepts = concept.findall('skos:prefLabel', ns)
                available_lang = get_all_lang_available_with_title(
                    concepts, LANG_ATTRIB)
                alt_label = determinate_value(available_lang, default_lang)

            print(f'Concept {alt_label} ({about})')

            tk = ThesaurusKeyword()
            tk.thesaurus = thesaurus
            tk.about = about
            tk.alt_label = alt_label

            if store:
                tk.save()

            for pref_label in concept.findall('skos:prefLabel', ns):
                lang = pref_label.attrib.get(LANG_ATTRIB)
                label = pref_label.text

                print(f'    Label {lang}: {label}')

                tkl = ThesaurusKeywordLabel()
                tkl.keyword = tk
                tkl.lang = lang
                tkl.label = label

                if store:
                    tkl.save()

Example #26

0

Show file

File: xpath.py Project: olivierh59500/python-libweb

 def build_tree(self, content):  # pylint: disable=no-self-use
     """Uses defusedxml to parse the response into ElementTree"""
     return parse(BytesIO(content))

Example #27

0

Show file

File: xpath.py Project: maheshakulaa/SplunkGit

            exit(0)
        path = keywords[0]
        # Support for searching with absolute path
        if len(path) > 1 and path[0] == '/' and path[1] != '/':
            path = '/data' + path
        results, dummyresults, settings = si.getOrganizedResults()
        # for each results
        for result in results:
            # get field value
            myxml = result.get(field, None)
            added = False
            if myxml != None:
                # make event value valid xml
                myxml = "<data>%s</data>" % myxml
                try:
                    et = safe_lxml.parse(StringIO(myxml))
                    nodes = et.xpath(path)
                    values = [tostr(node) for node in nodes]
                    result[outfield] = values
                    added = True
                except Exception as e:
                    pass  # consider throwing exception and explain path problem

            if not added and defaultval != None:
                result[outfield] = defaultval

        si.outputResults(results)
    except Exception as e:
        import traceback
        stack = traceback.format_exc()
        si.generateErrorResults("Error '%s'. %s" % (e, stack))

Example #28

0

Show file

File: base.py Project: victorlin/depot

 def from_file(cls, filename=None, fileobj=None, *args, **kwargs):
     fileobj = fileobj or open(filename, "rb")
     kwargs["filename"] = filename
     kwargs["root"] = lxml.parse(fileobj)
     return cls.from_element(*args, **kwargs)

Example #29

0

Show file

File: util.py Project: ecederstrand/exchangelib

 def parse_bytes(xml_bytes):
     return parse(io.BytesIO(xml_bytes))

Example #30

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testXXE(self):
		with self.assertRaises(EntitiesForbidden):		
			tree = _LXML.parse('../../xml_files_windows/xxe/xxe.xml')

Example #31

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testInternalSubset_PEReferenceInDTD(self):                              
		with self.assertRaises(EntitiesForbidden):
			tree = _LXML.parse('../../xml_files_windows/xxep/internalSubset_PEReferenceInDTD.xml')

Example #32

0

Show file

import defusedxml.lxml as _LXML

tree = _LXML.parse("../../xml_files_windows/standard.xml")
root = tree.getroot()
print root.tag
print root.text

Example #33

0

Show file

File: testDefusedLxml.py Project: RUB-NDS/DTD-Attacks

	def testParameterEntity_doctype(self):				
		tree = _LXML.parse('../../xml_files_windows/xxep/parameterEntity_doctype.xml')
		root = tree.getroot()
		self.assertEquals(None, root.text)

Example #34

0

Show file

    def do_parse_format(self, fmt_path, fmt_file, pixbuf=False, indexed=False):
        """
        Parse the format file for the RAMSTKTreeView().

        :param str fmt_path: the base XML path in the format file to read.
        :param str fmt_file: the absolute path to the format file to read.
        :keyword bool pixbuf: indicates whether or not to prepend a PixBuf
                              column to the gtk.TreeModel().
        :keyword bool indexed: indicates whether or not to append a column to
                               the gtk.TreeModel() to hold indexing
                               information.
        :return: None
        :rtype: None
        """
        # Retrieve the column heading text from the format file.
        self.headings = lxml.parse(fmt_file).xpath(fmt_path + "/usertitle")

        # Retrieve the column datatype from the format file.
        self.datatypes = lxml.parse(fmt_file).xpath(fmt_path + "/datatype")

        # Retrieve the column position from the format file.
        _position = lxml.parse(fmt_file).xpath(fmt_path + "/position")

        # Retrieve the cell renderer type from the format file.
        self.widgets = lxml.parse(fmt_file).xpath(fmt_path + "/widget")

        # Retrieve whether or not the column is editable from the format file.
        self.editable = lxml.parse(fmt_file).xpath(fmt_path + "/editable")

        # Retrieve whether or not the column is visible from the format file.
        self.visible = lxml.parse(fmt_file).xpath(fmt_path + "/visible")

        # Initialize public scalar instance attributes.
        _keys = lxml.parse(fmt_file).xpath(fmt_path + "/key")

        # Create a list of GObject datatypes to pass to the model.
        for i in range(len(self.datatypes)):  # pylint: disable=C0200
            self.datatypes[i] = self.datatypes[i].text
            self.editable[i] = int(self.editable[i].text)
            self.headings[i] = self.headings[i].text.replace("  ", "\n")
            self.order.append(int(_position[i].text))
            self.visible[i] = int(self.visible[i].text)
            self.widgets[i] = self.widgets[i].text
            _position[i] = int(_position[i].text)
            # Not all format files will have keys.
            try:
                _keys[i] = _keys[i].text
            except IndexError:
                pass

        # Append entries to each list if this RAMSTKTreeView is to display an
        # icon at the beginning of the row (Usage Profile, Hardware, etc.)
        if pixbuf:
            self.datatypes.append('pixbuf')
            self.editable.append(0)
            self.headings.append('')
            self.order.append(len(self.order))
            self.pixbuf_col = int(len(self.datatypes)) - 1
            self.visible.append(1)
            self.widgets.append('pixbuf')

        # We may want to add a column to hold indexing information for program
        # control.  This is used, for example, by aggregate data views to hold
        # the Node ID from the PyPubSub Tree().
        if indexed:
            self.datatypes.append('gchararray')
            self.editable.append(0)
            self.headings.append('')
            self.order.append(len(self.order))
            self.visible.append(0)
            self.widgets.append('text')
            self.index_col = int(len(self.datatypes)) - 1

        # Sort each of the lists according to the desired sequence provided in
        # the _position list.  This is necessary to allow for user-specific
        # ordering of columns in the RAMSTKTreeView.
        self.datatypes = [
            x for _, x in sorted(zip(self.order, self.datatypes))
        ]
        self.editable = [x for _, x in sorted(zip(self.order, self.editable))]
        self.headings = [x for _, x in sorted(zip(self.order, self.headings))]
        self.korder = [x for _, x in sorted(zip(_position, _keys))]
        self.visible = [x for _, x in sorted(zip(self.order, self.visible))]
        self.widgets = [x for _, x in sorted(zip(self.order, self.widgets))]

        return None

Example #35

0

Show file

 def from_file(cls, path):
     return cls(parse(path))

Example #36

0

Show file

def common_checks_context_iati(context,
                               upload_dir,
                               data_file,
                               file_type,
                               api=False,
                               openag=False,
                               orgids=False):
    '''TODO: this function is trying to do too many things. Separate some
    of its logic into smaller functions doing one single thing each.
    '''
    schema_iati = SchemaIATI()
    cell_source_map = {}
    validation_errors_path = os.path.join(upload_dir,
                                          'validation_errors-3.json')

    with open(data_file, 'rb') as fp:
        try:
            tree = etree.parse(fp)
        except lxml.etree.XMLSyntaxError as err:
            raise CoveInputDataError(
                context={
                    'sub_title':
                    _("Sorry, we can't process that data"),
                    'link':
                    'index',
                    'link_text':
                    _('Try Again'),
                    'msg':
                    _(
                        format_html(
                            'We think you tried to upload a XML file, but it is not well formed XML.'
                            '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                            '</span> <strong>Error message:</strong> {}',
                            err)),
                    'error':
                    format(err)
                })
        except UnicodeDecodeError as err:
            raise CoveInputDataError(
                context={
                    'sub_title':
                    _("Sorry, we can't process that data"),
                    'link':
                    'index',
                    'link_text':
                    _('Try Again'),
                    'msg':
                    _(
                        format_html(
                            'We think you tried to upload a XML file, but the encoding is incorrect.'
                            '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                            '</span> <strong>Error message:</strong> {}',
                            err)),
                    'error':
                    format(err)
                })

    if tree.getroot().tag == 'iati-organisations':
        schema_path = schema_iati.organisation_schema
        schema_name = 'Organisation'
        # rulesets don't support orgnisation files properly yet
        # so disable rather than give partial information
        ruleset_disabled = True
    else:
        schema_path = schema_iati.activity_schema
        schema_name = 'Activity'
        ruleset_disabled = False
    errors_all, invalid_data = validate_against_schema(schema_path, tree)

    return_on_error = [{
        'message': 'There was a problem running ruleset checks',
        'exception': True
    }]

    # Validation errors
    if file_type != 'xml':
        with open(os.path.join(upload_dir,
                               'cell_source_map.json')) as cell_source_map_fp:
            cell_source_map = json.load(cell_source_map_fp)
    if os.path.exists(validation_errors_path):
        with open(validation_errors_path) as validation_error_fp:
            validation_errors = json.load(validation_error_fp)
    else:
        validation_errors = get_xml_validation_errors(errors_all, file_type,
                                                      cell_source_map)
        if not api:
            with open(validation_errors_path, 'w+') as validation_error_fp:
                validation_error_fp.write(json.dumps(validation_errors))

    # Ruleset errors
    if ruleset_disabled:
        ruleset_errors = None
    else:
        ruleset_errors = get_iati_ruleset_errors(
            tree,
            os.path.join(upload_dir, 'ruleset'),
            api=api,
            ignore_errors=invalid_data,
            return_on_error=return_on_error)

    if openag:
        ruleset_errors_ag = get_openag_ruleset_errors(
            tree,
            os.path.join(upload_dir, 'ruleset_openang'),
            ignore_errors=invalid_data,
            return_on_error=return_on_error)
        context.update({'ruleset_errors_openag': ruleset_errors_ag})
    if orgids:
        ruleset_errors_orgids = get_orgids_ruleset_errors(
            tree,
            os.path.join(upload_dir, 'ruleset_orgids'),
            ignore_errors=invalid_data,
            return_on_error=return_on_error)
        context.update({'ruleset_errors_orgids': ruleset_errors_orgids})

    context.update({
        'validation_errors': sorted(validation_errors.items()),
        'ruleset_errors': ruleset_errors
    })

    if not api:
        context.update({
            'validation_errors_count':
            sum(len(value) for value in validation_errors.values()),
            'cell_source_map':
            cell_source_map,
            'first_render':
            False,
            'schema_name':
            schema_name,
            'ruleset_disabled':
            ruleset_disabled
        })
        if ruleset_errors:
            ruleset_errors_by_activity = get_iati_ruleset_errors(
                tree,
                os.path.join(upload_dir, 'ruleset'),
                group_by='activity',
                ignore_errors=invalid_data,
                return_on_error=return_on_error)
            context['ruleset_errors'] = [
                ruleset_errors, ruleset_errors_by_activity
            ]

        count_ruleset_errors = 0
        if isinstance(ruleset_errors, dict):
            for rules in ruleset_errors.values():
                for errors in rules.values():
                    count_ruleset_errors += len(errors)

        context['ruleset_errors_count'] = count_ruleset_errors
    return context

Example #37

0

Show file

File: load_thesaurus.py Project: GII-ENSDI/geoportal

    def load_thesaurus(self, input_file, name, store):

        RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        XML_URI = 'http://www.w3.org/XML/1998/namespace'

        ABOUT_ATTRIB = '{' + RDF_URI + '}about'
        LANG_ATTRIB = '{' + XML_URI + '}lang'

        ns = {
            'rdf': RDF_URI,
            'foaf': 'http://xmlns.com/foaf/0.1/',
            'dc': 'http://purl.org/dc/elements/1.1/',
            'dcterms': 'http://purl.org/dc/terms/',
            'skos': 'http://www.w3.org/2004/02/skos/core#'
        }

        tfile = dlxml.parse(input_file)
        root = tfile.getroot()

        scheme = root.find('skos:ConceptScheme', ns)
        if not scheme:
            raise CommandError("ConceptScheme not found in file")

        title = scheme.find('dc:title', ns).text
        descr = scheme.find('dc:description', ns).text
        date_issued = scheme.find('dcterms:issued', ns).text

        print 'Thesaurus "{}" issued on {}'.format(title, date_issued)

        thesaurus = Thesaurus()
        thesaurus.identifier = name

        thesaurus.title = title
        thesaurus.description = descr
        thesaurus.date = date_issued

        if store:
            thesaurus.save()

        for concept in root.findall('skos:Concept', ns):
            about = concept.attrib.get(ABOUT_ATTRIB)
            alt_label = concept.find('skos:altLabel', ns).text

            print 'Concept {} ({})'.format(alt_label, about)

            tk = ThesaurusKeyword()
            tk.thesaurus = thesaurus
            tk.about = about
            tk.alt_label = alt_label

            if store:
                tk.save()

            for pref_label in concept.findall('skos:prefLabel', ns):
                lang = pref_label.attrib.get(LANG_ATTRIB)
                label = pref_label.text

                print u'    Label {}: {}'.format(lang, label)

                tkl = ThesaurusKeywordLabel()
                tkl.keyword = tk
                tkl.lang = lang
                tkl.label = label

                if store:
                    tkl.save()

Example #38

0

Show file

File: base.py Project: jportasa/depot-original

 def from_file(cls, filename=None, fileobj=None, *args, **kwargs):
     fileobj = fileobj or open(filename, 'rb')
     kwargs['filename'] = filename
     kwargs['root'] = lxml.parse(fileobj)
     return cls.from_element(*args, **kwargs)