def get_tree(data_file): with open(data_file, 'rb') as fp: try: tree = etree.parse(fp) except lxml.etree.XMLSyntaxError as err: raise CoveInputDataError(context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(format_html('We think you tried to upload a XML file, but it is not well formed XML.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) except UnicodeDecodeError as err: raise CoveInputDataError(context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(format_html('We think you tried to upload a XML file, but the encoding is incorrect.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) return tree
def to_xml(bytes_content): # Converts bytes or a generator of bytes to an XML tree # Exchange servers may spit out the weirdest XML. lxml is pretty good at recovering from errors if isinstance(bytes_content, bytes): stream = io.BytesIO(bytes_content) else: stream = BytesGeneratorIO(bytes_content) forgiving_parser = _forgiving_parser.getDefaultParser() try: return parse(stream, parser=forgiving_parser) except AssertionError as e: raise ParseError(e.args[0], '<not from file>', -1, 0) except _etree.ParseError as e: if hasattr(e, 'position'): e.lineno, e.offset = e.position if not e.lineno: raise ParseError(text_type(e), '<not from file>', e.lineno, e.offset) try: stream.seek(0) offending_line = stream.read().splitlines()[e.lineno - 1] except IndexError: raise ParseError(text_type(e), '<not from file>', e.lineno, e.offset) else: offending_excerpt = offending_line[max(0, e.offset - 20):e.offset + 20] msg = '%s\nOffending text: [...]%s[...]' % (text_type(e), offending_excerpt) raise ParseError(msg, e.lineno, e.offset) except TypeError: stream.seek(0) raise ParseError('This is not XML: %r' % stream.read(), '<not from file>', -1, 0)
def xml_tree(self): """ Parse the infile with lxml and add the proper namespace if required. :return etree.ElementTree: An lxml ElementTree with proper namespace """ if hasattr(self.infile, 'seek'): self.infile.seek(0) tree = lxml.parse(self.infile) if self.meta.namespaces: return tree log.debug('Adding namespaces to xml for validation') root = tree.getroot() ns_root = etree.Element( tree.docinfo.root_name, root.attrib, nsmap={None: self.meta.get_ns_string()} ) ns_root[:] = root[:] # Roundtrip to add namespace doc = lxml.tostring( ns_root, encoding=tree.docinfo.encoding, xml_declaration=True, pretty_print=True ) ns_tree = lxml.fromstring(doc) return etree.ElementTree(ns_tree)
def testXInclude(self): XINCLUDE = "{http://www.w3.org/2001/XInclude}" tree = _LXML.parse('../../xml_files_windows/xinclude.xml') root = tree.getroot() self.assertEquals("data", root.tag) child = root[0] self.assertEquals(XINCLUDE + "include", child.tag)
def case_to_etree(self, case): ''' Encapsulates the version passed to `CommCareCase.to_xml` and the temporary hack of re-parsing it. TODO: expose a direct etree encoding in casexml? ''' return lxml.parse(BytesIO(case.to_xml('2.0'))).getroot()
def parse_bytes(self, xml_bytes): root = parse(io.BytesIO(xml_bytes)) for elem in root.iter(): for attr in set(elem.keys()) & {'RootItemId', 'ItemId', 'Id', 'RootItemChangeKey', 'ChangeKey'}: elem.set(attr, 'DEADBEEF=') for s in self.forbidden_strings: elem.text.replace(s, '[REMOVED]') return root
def _build_dom(self, content, mode): from lxml.html import HTMLParser from lxml.etree import XMLParser from defusedxml.lxml import parse assert mode in ('html', 'xml') if mode == 'html': if not hasattr(THREAD_STORAGE, 'html_parser'): THREAD_STORAGE.html_parser = HTMLParser() dom = parse(StringIO(content), parser=THREAD_STORAGE.html_parser) return dom.getroot() else: if not hasattr(THREAD_STORAGE, 'xml_parser'): THREAD_STORAGE.xml_parser = XMLParser() dom = parse(BytesIO(content), parser=THREAD_STORAGE.xml_parser) return dom.getroot()
def validate_against_schema(schema_path, tree): with open(schema_path) as schema_fp: schema_tree = etree.parse(schema_fp) schema = lxml.etree.XMLSchema(schema_tree) schema.validate(tree) lxml_errors = lxml_errors_generator(schema.error_log) errors_all = format_lxml_errors(lxml_errors) invalid_data = bool(schema.error_log) return errors_all, invalid_data
def from_file(cls, infile): """ Contruct OnixMeta from an infile. :param infile: File or Path to file :type infile: file or str :return OnixMeta: Initialized OnixMeta instance """ tree = lxml.parse(infile) return cls.from_tree(tree)
def testURLInvocation_noNamespaceSchemaLocation(self): #Reset the server back to "0" r = requests.get("http://127.0.0.1:5000/reset") url_counter = "http://127.0.0.1:5000/getCounter" r = requests.get(url_counter) request_content = r.text.replace("\r\n","") self.assertEqual("0", request_content) tree = _LXML.parse('../../xml_files_windows/ssrf/url_invocation_noNamespaceSchemaLocation.xml') #Check if a request has been made r = requests.get(url_counter) request_content = r.text.replace("\r\n","") self.assertEqual("0", request_content)
def testURLInvocation_parameterEntity(self): #Reset the server back to "0" r = requests.get("http://127.0.0.1:5000/reset") url_counter = "http://127.0.0.1:5000/getCounter" r = requests.get(url_counter) request_content = r.text.replace("\r\n","") self.assertEqual("0", request_content) with self.assertRaises(EntitiesForbidden): tree = _LXML.parse('../../xml_files_windows/ssrf/url_invocation_parameterEntity.xml') #Check if a request has been made r = requests.get(url_counter) request_content = r.text.replace("\r\n","") self.assertEqual("0", request_content)
def test_ruleset_error_exceptions_handling(validated_data): return_on_error = [{'message': 'There was a problem running ruleset checks', 'exception': True}] file_path = os.path.join('cove_iati', 'fixtures', 'basic_iati_unordered_valid.xml') with open(file_path) as fp: valid_data_tree = etree.parse(fp) upload_dir = os.path.join('media', str(uuid.uuid4())) ruleset_errors = iati.get_iati_ruleset_errors( valid_data_tree, os.path.join(upload_dir, 'ruleset'), ignore_errors=False, return_on_error=return_on_error ) assert ruleset_errors != return_on_error file_path = os.path.join('cove_iati', 'fixtures', 'basic_iati_ruleset_errors.xml') with open(file_path) as fp: invalid_data_tree = etree.parse(fp) invalid_data_tree = etree.fromstring(INVALID_DATA) upload_dir = os.path.join('media', str(uuid.uuid4())) ruleset_errors = iati.get_iati_ruleset_errors( invalid_data_tree, # Causes an exception in ruleset checks os.path.join(upload_dir, 'ruleset'), ignore_errors=True, # Exception ignored return_on_error=return_on_error ) assert ruleset_errors == return_on_error with pytest.raises(AttributeError): upload_dir = os.path.join('media', str(uuid.uuid4())) ruleset_errors = iati.get_iati_ruleset_errors( invalid_data_tree, # Causes an exception in ruleset checks os.path.join(upload_dir, 'ruleset'), ignore_errors=False, # Exception not ignored return_on_error=return_on_error )
def import_workflow_root(workflow, workflow_definition_root, metadata=None, fs=None): try: xslt_definition_fh = open("%(xslt_dir)s/workflow.xslt" % { 'xslt_dir': os.path.join(DEFINITION_XSLT_DIR.get(), 'workflows') }) tag = etree.QName(workflow_definition_root.tag) schema_version = tag.namespace # Ensure namespace exists if schema_version not in OOZIE_NAMESPACES: raise RuntimeError(_("Tag with namespace %(namespace)s is not valid. Please use one of the following namespaces: %(namespaces)s") % { 'namespace': workflow_definition_root.tag, 'namespaces': ', '.join(OOZIE_NAMESPACES) }) # Get XSLT xslt = parse(xslt_definition_fh) xslt_definition_fh.close() transform = etree.XSLT(xslt) # Transform XML using XSLT transformed_root = transform(workflow_definition_root) # Resolve workflow dependencies and node types and link dependencies nodes = _prepare_nodes(workflow, transformed_root) _preprocess_nodes(workflow, transformed_root, workflow_definition_root, nodes, fs) _save_nodes(workflow, nodes) _save_links(workflow, workflow_definition_root) _assign_workflow_properties(workflow, workflow_definition_root, schema_version) if metadata: _process_metadata(workflow, metadata) # Update workflow attributes workflow.schema_version = schema_version workflow.name = workflow_definition_root.get('name') workflow.save() except: workflow.delete(skip_trash=True) raise
def import_workflow(workflow, workflow_definition, fs=None): xslt_definition_fh = open("%(xslt_dir)s/workflow.xslt" % { 'xslt_dir': DEFINITION_XSLT_DIR.get() }) # Parse Workflow Definition workflow_definition_root = fromstring(workflow_definition) if workflow_definition_root is None: raise RuntimeError(_("Could not find any nodes in Workflow definition. Maybe it's malformed?")) ns = workflow_definition_root.tag[:-12] # Remove workflow-app from tag in order to get proper namespace prefix schema_version = ns and ns[1:-1] or None # Ensure namespace exists if schema_version not in OOZIE_NAMESPACES: raise RuntimeError(_("Tag with namespace %(namespace)s is not valid. Please use one of the following namespaces: %(namespaces)s") % { 'namespace': workflow_definition_root.tag, 'namespaces': ', '.join(OOZIE_NAMESPACES) }) # Get XSLT xslt = parse(xslt_definition_fh) xslt_definition_fh.close() transform = etree.XSLT(xslt) # Transform XML using XSLT transformed_root = transform(workflow_definition_root) # Resolve workflow dependencies and node types and link dependencies nodes = _prepare_nodes(workflow, transformed_root) _preprocess_nodes(workflow, transformed_root, workflow_definition_root, nodes, fs) _save_nodes(workflow, nodes) _save_links(workflow, workflow_definition_root) # Update schema_version workflow.schema_version = schema_version workflow.save()
def _resolve_subworkflow_from_deployment_dir(fs, workflow, app_path): """ Resolves subworkflow in a subworkflow node Looks at path and interrogates all workflows until the proper deployment path is found. If the proper deployment path is never found, then """ if not fs: raise RuntimeError(_("No hadoop file system to operate on.")) if app_path.endswith('/'): app_path = app_path[:-1] if app_path.startswith('hdfs://'): app_path = app_path[7:] try: f = fs.open('%s/workflow.xml' % app_path) root = parse(f) f.close() return Workflow.objects.get(name=root.attrib['name'], owner=workflow.owner, managed=True) except IOError: pass except (KeyError, AttributeError), e: raise RuntimeError(_("Could not find workflow name when resolving subworkflow."))
def testDOS_entitySize(self): #with self.assertRaises(XMLSyntaxError): with self.assertRaises(EntitiesForbidden): tree = _LXML.parse('../../xml_files_windows/dos/dos_entitySize.xml')
def testDOS_indirections(self): with self.assertRaises(XMLSyntaxError): tree = _LXML.parse('../../xml_files_windows/dos/dos_indirections.xml')
def testDOS_core(self): with self.assertRaises(EntitiesForbidden): tree = _LXML.parse('../../xml_files_windows/dos/dos_core.xml')
def testDefault_noAttack(self): tree = _LXML.parse('../../xml_files_windows/standard.xml') root = tree.getroot() self.assertIn("4",root.text)
def _load_tree(self, addon_path): if addon_path.path.suffix.lower() == ".xml": with addon_path.path.open(mode="rb") as f: root_node = lxml.parse(f).getroot() yield XMLDocument(addon_path.addon, addon_path.path, root_node)
def parse_bytes(xml_bytes): return parse(io.BytesIO(xml_bytes))
def testParameterEntity_core(self): with self.assertRaises(EntitiesForbidden): tree = _LXML.parse('../../xml_files_windows/xxep/parameterEntity_core.xml')
def get_root(xml_filename: pathlib.Path): with xml_filename.open(mode="rb") as f: return lxml.parse(f).getroot()
def common_checks_context_iati(context, upload_dir, data_file, file_type, api=False): schema_aiti = SchemaIATI() lxml_errors = {} cell_source_map = {} validation_errors_path = os.path.join(upload_dir, 'validation_errors-2.json') with open(data_file) as fp, open(schema_aiti.activity_schema) as schema_fp: try: tree = etree.parse(fp) except lxml.etree.XMLSyntaxError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('We think you tried to upload a XML file, but it is not well formed XML.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}'.format( err)), 'error': format(err) }) schema_tree = etree.parse(schema_fp) schema = lxml.etree.XMLSchema(schema_tree) schema.validate(tree) lxml_errors = lxml_errors_generator(schema.error_log) ruleset_errors = get_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset')) errors_all = format_lxml_errors(lxml_errors) if file_type != 'xml': with open(os.path.join(upload_dir, 'cell_source_map.json')) as cell_source_map_fp: cell_source_map = json.load(cell_source_map_fp) if os.path.exists(validation_errors_path): with open(validation_errors_path) as validation_error_fp: validation_errors = json.load(validation_error_fp) else: validation_errors = get_xml_validation_errors(errors_all, file_type, cell_source_map) if not api: with open(validation_errors_path, 'w+') as validation_error_fp: validation_error_fp.write(json.dumps(validation_errors)) context.update({ 'validation_errors': sorted(validation_errors.items()), 'ruleset_errors': ruleset_errors, }) if not api: context.update({ 'validation_errors_count': sum(len(value) for value in validation_errors.values()), 'ruleset_errors_count': len(ruleset_errors), 'cell_source_map': cell_source_map, 'first_render': False }) return context
def load_thesaurus(self, input_file, name, store): RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' XML_URI = 'http://www.w3.org/XML/1998/namespace' ABOUT_ATTRIB = f"{{{RDF_URI}}}about" LANG_ATTRIB = f"{{{XML_URI}}}lang" ns = { 'rdf': RDF_URI, 'foaf': 'http://xmlns.com/foaf/0.1/', 'dc': 'http://purl.org/dc/elements/1.1/', 'dcterms': 'http://purl.org/dc/terms/', 'skos': 'http://www.w3.org/2004/02/skos/core#' } tfile = dlxml.parse(input_file) root = tfile.getroot() scheme = root.find('skos:ConceptScheme', ns) if not scheme: raise CommandError("ConceptScheme not found in file") titles = scheme.findall('dc:title', ns) default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None) available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB) thesaurus_title = determinate_value(available_lang, default_lang) descr = scheme.find('dc:description', ns).text if scheme.find( 'dc:description', ns) else thesaurus_title date_issued = scheme.find('dcterms:issued', ns).text about = scheme.attrib.get(ABOUT_ATTRIB) print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}') thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = thesaurus_title thesaurus.description = descr thesaurus.about = about thesaurus.date = date_issued if store: thesaurus.save() for lang in available_lang: if lang[0] is not None: thesaurus_label = ThesaurusLabel() thesaurus_label.lang = lang[0] thesaurus_label.label = lang[1] thesaurus_label.thesaurus = thesaurus thesaurus_label.save() for concept in root.findall('skos:Concept', ns): about = concept.attrib.get(ABOUT_ATTRIB) alt_label = concept.find('skos:altLabel', ns) if alt_label is not None: alt_label = alt_label.text else: concepts = concept.findall('skos:prefLabel', ns) available_lang = get_all_lang_available_with_title( concepts, LANG_ATTRIB) alt_label = determinate_value(available_lang, default_lang) print(f'Concept {alt_label} ({about})') tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for pref_label in concept.findall('skos:prefLabel', ns): lang = pref_label.attrib.get(LANG_ATTRIB) label = pref_label.text print(f' Label {lang}: {label}') tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()
def build_tree(self, content): # pylint: disable=no-self-use """Uses defusedxml to parse the response into ElementTree""" return parse(BytesIO(content))
exit(0) path = keywords[0] # Support for searching with absolute path if len(path) > 1 and path[0] == '/' and path[1] != '/': path = '/data' + path results, dummyresults, settings = si.getOrganizedResults() # for each results for result in results: # get field value myxml = result.get(field, None) added = False if myxml != None: # make event value valid xml myxml = "<data>%s</data>" % myxml try: et = safe_lxml.parse(StringIO(myxml)) nodes = et.xpath(path) values = [tostr(node) for node in nodes] result[outfield] = values added = True except Exception as e: pass # consider throwing exception and explain path problem if not added and defaultval != None: result[outfield] = defaultval si.outputResults(results) except Exception as e: import traceback stack = traceback.format_exc() si.generateErrorResults("Error '%s'. %s" % (e, stack))
def from_file(cls, filename=None, fileobj=None, *args, **kwargs): fileobj = fileobj or open(filename, "rb") kwargs["filename"] = filename kwargs["root"] = lxml.parse(fileobj) return cls.from_element(*args, **kwargs)
def testXXE(self): with self.assertRaises(EntitiesForbidden): tree = _LXML.parse('../../xml_files_windows/xxe/xxe.xml')
def testInternalSubset_PEReferenceInDTD(self): with self.assertRaises(EntitiesForbidden): tree = _LXML.parse('../../xml_files_windows/xxep/internalSubset_PEReferenceInDTD.xml')
import defusedxml.lxml as _LXML tree = _LXML.parse("../../xml_files_windows/standard.xml") root = tree.getroot() print root.tag print root.text
def testParameterEntity_doctype(self): tree = _LXML.parse('../../xml_files_windows/xxep/parameterEntity_doctype.xml') root = tree.getroot() self.assertEquals(None, root.text)
def do_parse_format(self, fmt_path, fmt_file, pixbuf=False, indexed=False): """ Parse the format file for the RAMSTKTreeView(). :param str fmt_path: the base XML path in the format file to read. :param str fmt_file: the absolute path to the format file to read. :keyword bool pixbuf: indicates whether or not to prepend a PixBuf column to the gtk.TreeModel(). :keyword bool indexed: indicates whether or not to append a column to the gtk.TreeModel() to hold indexing information. :return: None :rtype: None """ # Retrieve the column heading text from the format file. self.headings = lxml.parse(fmt_file).xpath(fmt_path + "/usertitle") # Retrieve the column datatype from the format file. self.datatypes = lxml.parse(fmt_file).xpath(fmt_path + "/datatype") # Retrieve the column position from the format file. _position = lxml.parse(fmt_file).xpath(fmt_path + "/position") # Retrieve the cell renderer type from the format file. self.widgets = lxml.parse(fmt_file).xpath(fmt_path + "/widget") # Retrieve whether or not the column is editable from the format file. self.editable = lxml.parse(fmt_file).xpath(fmt_path + "/editable") # Retrieve whether or not the column is visible from the format file. self.visible = lxml.parse(fmt_file).xpath(fmt_path + "/visible") # Initialize public scalar instance attributes. _keys = lxml.parse(fmt_file).xpath(fmt_path + "/key") # Create a list of GObject datatypes to pass to the model. for i in range(len(self.datatypes)): # pylint: disable=C0200 self.datatypes[i] = self.datatypes[i].text self.editable[i] = int(self.editable[i].text) self.headings[i] = self.headings[i].text.replace(" ", "\n") self.order.append(int(_position[i].text)) self.visible[i] = int(self.visible[i].text) self.widgets[i] = self.widgets[i].text _position[i] = int(_position[i].text) # Not all format files will have keys. try: _keys[i] = _keys[i].text except IndexError: pass # Append entries to each list if this RAMSTKTreeView is to display an # icon at the beginning of the row (Usage Profile, Hardware, etc.) if pixbuf: self.datatypes.append('pixbuf') self.editable.append(0) self.headings.append('') self.order.append(len(self.order)) self.pixbuf_col = int(len(self.datatypes)) - 1 self.visible.append(1) self.widgets.append('pixbuf') # We may want to add a column to hold indexing information for program # control. This is used, for example, by aggregate data views to hold # the Node ID from the PyPubSub Tree(). if indexed: self.datatypes.append('gchararray') self.editable.append(0) self.headings.append('') self.order.append(len(self.order)) self.visible.append(0) self.widgets.append('text') self.index_col = int(len(self.datatypes)) - 1 # Sort each of the lists according to the desired sequence provided in # the _position list. This is necessary to allow for user-specific # ordering of columns in the RAMSTKTreeView. self.datatypes = [ x for _, x in sorted(zip(self.order, self.datatypes)) ] self.editable = [x for _, x in sorted(zip(self.order, self.editable))] self.headings = [x for _, x in sorted(zip(self.order, self.headings))] self.korder = [x for _, x in sorted(zip(_position, _keys))] self.visible = [x for _, x in sorted(zip(self.order, self.visible))] self.widgets = [x for _, x in sorted(zip(self.order, self.widgets))] return None
def from_file(cls, path): return cls(parse(path))
def common_checks_context_iati(context, upload_dir, data_file, file_type, api=False, openag=False, orgids=False): '''TODO: this function is trying to do too many things. Separate some of its logic into smaller functions doing one single thing each. ''' schema_iati = SchemaIATI() cell_source_map = {} validation_errors_path = os.path.join(upload_dir, 'validation_errors-3.json') with open(data_file, 'rb') as fp: try: tree = etree.parse(fp) except lxml.etree.XMLSyntaxError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _( format_html( 'We think you tried to upload a XML file, but it is not well formed XML.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) except UnicodeDecodeError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _( format_html( 'We think you tried to upload a XML file, but the encoding is incorrect.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) if tree.getroot().tag == 'iati-organisations': schema_path = schema_iati.organisation_schema schema_name = 'Organisation' # rulesets don't support orgnisation files properly yet # so disable rather than give partial information ruleset_disabled = True else: schema_path = schema_iati.activity_schema schema_name = 'Activity' ruleset_disabled = False errors_all, invalid_data = validate_against_schema(schema_path, tree) return_on_error = [{ 'message': 'There was a problem running ruleset checks', 'exception': True }] # Validation errors if file_type != 'xml': with open(os.path.join(upload_dir, 'cell_source_map.json')) as cell_source_map_fp: cell_source_map = json.load(cell_source_map_fp) if os.path.exists(validation_errors_path): with open(validation_errors_path) as validation_error_fp: validation_errors = json.load(validation_error_fp) else: validation_errors = get_xml_validation_errors(errors_all, file_type, cell_source_map) if not api: with open(validation_errors_path, 'w+') as validation_error_fp: validation_error_fp.write(json.dumps(validation_errors)) # Ruleset errors if ruleset_disabled: ruleset_errors = None else: ruleset_errors = get_iati_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset'), api=api, ignore_errors=invalid_data, return_on_error=return_on_error) if openag: ruleset_errors_ag = get_openag_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset_openang'), ignore_errors=invalid_data, return_on_error=return_on_error) context.update({'ruleset_errors_openag': ruleset_errors_ag}) if orgids: ruleset_errors_orgids = get_orgids_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset_orgids'), ignore_errors=invalid_data, return_on_error=return_on_error) context.update({'ruleset_errors_orgids': ruleset_errors_orgids}) context.update({ 'validation_errors': sorted(validation_errors.items()), 'ruleset_errors': ruleset_errors }) if not api: context.update({ 'validation_errors_count': sum(len(value) for value in validation_errors.values()), 'cell_source_map': cell_source_map, 'first_render': False, 'schema_name': schema_name, 'ruleset_disabled': ruleset_disabled }) if ruleset_errors: ruleset_errors_by_activity = get_iati_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset'), group_by='activity', ignore_errors=invalid_data, return_on_error=return_on_error) context['ruleset_errors'] = [ ruleset_errors, ruleset_errors_by_activity ] count_ruleset_errors = 0 if isinstance(ruleset_errors, dict): for rules in ruleset_errors.values(): for errors in rules.values(): count_ruleset_errors += len(errors) context['ruleset_errors_count'] = count_ruleset_errors return context
def load_thesaurus(self, input_file, name, store): RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' XML_URI = 'http://www.w3.org/XML/1998/namespace' ABOUT_ATTRIB = '{' + RDF_URI + '}about' LANG_ATTRIB = '{' + XML_URI + '}lang' ns = { 'rdf': RDF_URI, 'foaf': 'http://xmlns.com/foaf/0.1/', 'dc': 'http://purl.org/dc/elements/1.1/', 'dcterms': 'http://purl.org/dc/terms/', 'skos': 'http://www.w3.org/2004/02/skos/core#' } tfile = dlxml.parse(input_file) root = tfile.getroot() scheme = root.find('skos:ConceptScheme', ns) if not scheme: raise CommandError("ConceptScheme not found in file") title = scheme.find('dc:title', ns).text descr = scheme.find('dc:description', ns).text date_issued = scheme.find('dcterms:issued', ns).text print 'Thesaurus "{}" issued on {}'.format(title, date_issued) thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = title thesaurus.description = descr thesaurus.date = date_issued if store: thesaurus.save() for concept in root.findall('skos:Concept', ns): about = concept.attrib.get(ABOUT_ATTRIB) alt_label = concept.find('skos:altLabel', ns).text print 'Concept {} ({})'.format(alt_label, about) tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for pref_label in concept.findall('skos:prefLabel', ns): lang = pref_label.attrib.get(LANG_ATTRIB) label = pref_label.text print u' Label {}: {}'.format(lang, label) tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()
def from_file(cls, filename=None, fileobj=None, *args, **kwargs): fileobj = fileobj or open(filename, 'rb') kwargs['filename'] = filename kwargs['root'] = lxml.parse(fileobj) return cls.from_element(*args, **kwargs)