def get_schemacomponents(self): ''' Return schema components as lxml.etree.Element list ''' node1 = etree.Element( util.nspath_eval('csw:SchemaComponent', self.context.namespaces), schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace, parentSchema='gmd.xsd') schema_file = os.path.join(self.context.pycsw_home, 'plugins', 'profiles', 'apiso', 'schemas', 'ogc', 'iso', '19139', '20060504', 'gmd', 'identification.xsd') schema = etree.parse(schema_file, self.context.parser).getroot() node1.append(schema) node2 = etree.Element( util.nspath_eval('csw:SchemaComponent', self.context.namespaces), schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace, parentSchema='gmd.xsd') schema_file = os.path.join(self.context.pycsw_home, 'plugins', 'profiles', 'apiso', 'schemas', 'ogc', 'iso', '19139', '20060504', 'srv', 'serviceMetadata.xsd') schema = etree.parse(schema_file, self.context.parser).getroot() node2.append(schema) return [node1, node2]
def _render_xslt(self, res): ''' Validate and render XSLT ''' LOGGER.debug('Rendering XSLT') try: input_os = res.schema output_os = self.kvp['outputschema'] xslt_id = 'xslt:%s,%s' % (input_os, output_os) xslt_dict = next(d for i, d in enumerate(self.xslts) if xslt_id in d) LOGGER.debug('XSLT ID: %s' % xslt_id) LOGGER.debug('Found matching XSLT transformation') xslt = xslt_dict[xslt_id] transform = etree.XSLT(etree.parse(xslt)) doc = etree.fromstring(res.xml, self.context.parser) result_tree = transform(doc).getroot() return result_tree except StopIteration: LOGGER.debug('No matching XSLT found') pass except Exception as err: LOGGER.warning('XSLT transformation failed: %s' % str(err)) raise RuntimeError()
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False): """Load metadata records from directory of files to database""" repo = repository.Repository(database, context, table=table) file_list = [] if os.path.isfile(xml_dirpath): file_list.append(xml_dirpath) elif recursive: for root, dirs, files in os.walk(xml_dirpath): for mfile in files: if mfile.endswith('.xml'): file_list.append(os.path.join(root, mfile)) else: for rec in glob(os.path.join(xml_dirpath, '*.xml')): file_list.append(rec) total = len(file_list) counter = 0 for recfile in sorted(file_list): counter += 1 LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total) # read document try: exml = etree.parse(recfile, context.parser) except Exception as err: LOGGER.warn('XML document is not well-formed: %s', str(err)) continue record = metadata.parse_record(context, exml, repo) for rec in record: LOGGER.info('Inserting %s %s into database %s, table %s ....', rec.typename, rec.identifier, database, table) # TODO: do this as CSW Harvest try: repo.insert(rec, 'local', util.get_today_and_now()) LOGGER.info('Inserted') except RuntimeError as err: if force_update: LOGGER.info('Record exists. Updating.') repo.update(rec) LOGGER.info('Updated') else: LOGGER.warn('ERROR: not inserted %s', err)
def validate_xml(xml, xsd): """Validate XML document against XML Schema""" LOGGER.info('Validating %s against schema %s', xml, xsd) schema = etree.XMLSchema(file=xsd) try: valid = etree.parse(xml, PARSER) return 'Valid' except Exception as err: LOGGER.exception('Invalid XML') raise RuntimeError('ERROR: %s' % str(err)) from err
def validate_xml(xml, xsd): """Validate XML document against XML Schema""" LOGGER.info('Validating %s against schema %s', xml, xsd) schema = etree.XMLSchema(file=xsd) parser = etree.XMLParser(schema=schema, resolve_entities=False) try: valid = etree.parse(xml, parser) return 'Valid' except Exception as err: raise RuntimeError('ERROR: %s' % str(err))
def validate_xml(xml, xsd): """Validate XML document against XML Schema""" LOGGER.info('Validating %s against schema %s', xml, xsd) schema = etree.XMLSchema(file=xsd) try: valid = etree.parse(xml, PARSER) return 'Valid' except Exception as err: LOGGER.exception('Invalid XML') raise RuntimeError('ERROR: %s' % str(err))
def get_schemacomponents(self): ''' Return schema components as lxml.etree.Element list ''' node = etree.Element( util.nspath_eval('csw:SchemaComponent', self.context.namespaces), schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace) schema = etree.parse(os.path.join(self.context.pycsw_home, 'plugins', 'profiles', 'ebrim', 'schemas', 'ogc', 'csw', '2.0.2', 'profiles', 'ebrim', '1.0', 'csw-ebrim.xsd')).getroot() node.append(schema) return [node]
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False): """Load metadata records from directory of files to database""" repo = repository.Repository(database, context, table=table) file_list = [] if os.path.isfile(xml_dirpath): file_list.append(xml_dirpath) elif recursive: for root, dirs, files in os.walk(xml_dirpath): for mfile in files: if mfile.endswith(".xml"): file_list.append(os.path.join(root, mfile)) else: for rec in glob(os.path.join(xml_dirpath, "*.xml")): file_list.append(rec) total = len(file_list) counter = 0 for recfile in sorted(file_list): counter += 1 LOGGER.info("Processing file %s (%d of %d)", recfile, counter, total) # read document try: exml = etree.parse(recfile, context.parser) except Exception as err: LOGGER.warn("XML document is not well-formed: %s", str(err)) continue record = metadata.parse_record(context, exml, repo) for rec in record: LOGGER.info( "Inserting %s %s into database %s, table %s ....", rec.typename, rec.identifier, database, table ) # TODO: do this as CSW Harvest try: repo.insert(rec, "local", util.get_today_and_now()) LOGGER.info("Inserted") except RuntimeError as err: if force_update: LOGGER.info("Record exists. Updating.") repo.update(rec) LOGGER.info("Updated") else: LOGGER.warn("ERROR: not inserted %s", err)
def get_schemacomponents(self): ''' Return schema components as lxml.etree.Element list ''' node = etree.Element(util.nspath_eval('csw:SchemaComponent', self.context.namespaces), schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace) schema = etree.parse( os.path.join(self.context.pycsw_home, 'plugins', 'profiles', 'ebrim', 'schemas', 'ogc', 'csw', '2.0.2', 'profiles', 'ebrim', '1.0', 'csw-ebrim.xsd')).getroot() node.append(schema) return [node]
def get_schemacomponents(self): ''' Return schema components as lxml.etree.Element list ''' schema_nodes = [] for schema_path in self.schemas_paths: node = etree.Element(util.nspath_eval('csw:SchemaComponent', self.context.namespaces), schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace) schema_file = os.path.join(self.context.pycsw_home, *schema_path) schema = etree.parse(schema_file, self.context.parser).getroot() node.append(schema) schema_nodes.append(node) return schema_nodes
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False): """Load metadata records from directory of files to database""" from sqlalchemy.exc import DBAPIError repo = repository.Repository(database, context, table=table) file_list = [] loaded_files = set() if os.path.isfile(xml_dirpath): file_list.append(xml_dirpath) elif recursive: for root, dirs, files in os.walk(xml_dirpath): for mfile in files: if mfile.endswith('.xml'): file_list.append(os.path.join(root, mfile)) else: for rec in glob(os.path.join(xml_dirpath, '*.xml')): file_list.append(rec) total = len(file_list) counter = 0 for recfile in sorted(file_list): counter += 1 LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total) # read document try: exml = etree.parse(recfile, context.parser) except etree.XMLSyntaxError as err: LOGGER.error('XML document "%s" is not well-formed', recfile, exc_info=True) continue except Exception as err: LOGGER.exception('XML document "%s" is not well-formed', recfile) continue try: record = metadata.parse_record(context, exml, repo) except Exception as err: LOGGER.exception('Could not parse "%s" as an XML record', recfile) continue for rec in record: LOGGER.info('Inserting %s %s into database %s, table %s ....', rec.typename, rec.identifier, database, table) # TODO: do this as CSW Harvest try: repo.insert(rec, 'local', util.get_today_and_now()) loaded_files.add(recfile) LOGGER.info('Inserted %s', recfile) except Exception as err: if force_update: LOGGER.info('Record exists. Updating.') repo.update(rec) LOGGER.info('Updated %s', recfile) loaded_files.add(recfile) else: if isinstance(err, DBAPIError) and err.args: # Pull a decent database error message and not the full SQL that was run # since INSERT SQL statements are rather large. LOGGER.error('ERROR: %s not inserted: %s', recfile, err.args[0], exc_info=True) else: LOGGER.error('ERROR: %s not inserted: %s', recfile, err, exc_info=True) return tuple(loaded_files)
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False): """Load metadata records from directory of files to database""" from sqlalchemy.exc import DBAPIError repo = repository.Repository(database, context, table=table) file_list = [] loaded_files = set() if os.path.isfile(xml_dirpath): file_list.append(xml_dirpath) elif recursive: for root, dirs, files in os.walk(xml_dirpath): for mfile in files: if mfile.endswith('.xml'): file_list.append(os.path.join(root, mfile)) else: for rec in glob(os.path.join(xml_dirpath, '*.xml')): file_list.append(rec) total = len(file_list) counter = 0 for recfile in sorted(file_list): counter += 1 LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total) # read document try: exml = etree.parse(recfile, context.parser) except etree.XMLSyntaxError as err: LOGGER.error('XML document "%s" is not well-formed', recfile) continue except Exception as err: LOGGER.exception('XML document "%s" is not well-formed', recfile) continue try: record = metadata.parse_record(context, exml, repo) except Exception as err: LOGGER.exception('Could not parse "%s" as an XML record', recfile) continue for rec in record: LOGGER.info('Inserting %s %s into database %s, table %s ....', rec.typename, rec.identifier, database, table) # TODO: do this as CSW Harvest try: repo.insert(rec, 'local', util.get_today_and_now()) loaded_files.add(recfile) LOGGER.info('Inserted %s', recfile) except Exception as err: if force_update: LOGGER.info('Record exists. Updating.') repo.update(rec) LOGGER.info('Updated %s', recfile) loaded_files.add(recfile) else: if isinstance(err, DBAPIError) and err.args: # Pull a decent database error message and not the full SQL that was run # since INSERT SQL statements are rather large. LOGGER.error('ERROR: %s not inserted: %s', recfile, err.args[0]) else: LOGGER.error('ERROR: %s not inserted: %s', recfile, err) return tuple(loaded_files)
elif os.path.isdir(xml_dirpath): files_names = [ os.path.join(xml_dirpath, f) for f in os.listdir(xml_dirpath) ] else: print('Undefined xml files path in command line input') sys.exit(1) if not catalog_slug: print('Undefined catalog slug in command line input') sys.exit(1) # Create repository object with catalog slug. context = config.StaticContext() repo = RegistryRepository(PYCSW['repository']['database'], context, table=PYCSW['repository']['table']) repo.catalog = catalog_slug # Create index with mapping in Elasticsarch. create_index(catalog_slug) # Parse each xml file and insert records. for xml_file in files_names: parsed_xml = etree.parse(xml_file, context.parser) load_records(repo, parsed_xml, context) sys.exit(0) management.execute_from_command_line()