Beispiel #1
0
    def get_schemacomponents(self):
        ''' Return schema components as lxml.etree.Element list '''

        node1 = etree.Element(
        util.nspath_eval('csw:SchemaComponent', self.context.namespaces),
        schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace,
        parentSchema='gmd.xsd')

        schema_file = os.path.join(self.context.pycsw_home, 'plugins',
                                   'profiles', 'apiso', 'schemas', 'ogc',
                                   'iso', '19139', '20060504', 'gmd',
                                   'identification.xsd')

        schema = etree.parse(schema_file, self.context.parser).getroot()

        node1.append(schema)

        node2 = etree.Element(
        util.nspath_eval('csw:SchemaComponent', self.context.namespaces),
        schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace,
        parentSchema='gmd.xsd')

        schema_file = os.path.join(self.context.pycsw_home, 'plugins',
                                   'profiles', 'apiso', 'schemas', 'ogc',
                                   'iso', '19139', '20060504', 'srv',
                                   'serviceMetadata.xsd')

        schema = etree.parse(schema_file, self.context.parser).getroot()

        node2.append(schema)

        return [node1, node2]
Beispiel #2
0
    def _render_xslt(self, res):
        ''' Validate and render XSLT '''

        LOGGER.debug('Rendering XSLT')
        try:
            input_os = res.schema
            output_os = self.kvp['outputschema']

            xslt_id = 'xslt:%s,%s' % (input_os, output_os)
            xslt_dict = next(d for i, d in enumerate(self.xslts)
                             if xslt_id in d)

            LOGGER.debug('XSLT ID: %s' % xslt_id)
            LOGGER.debug('Found matching XSLT transformation')

            xslt = xslt_dict[xslt_id]

            transform = etree.XSLT(etree.parse(xslt))
            doc = etree.fromstring(res.xml, self.context.parser)
            result_tree = transform(doc).getroot()
            return result_tree
        except StopIteration:
            LOGGER.debug('No matching XSLT found')
            pass
        except Exception as err:
            LOGGER.warning('XSLT transformation failed: %s' % str(err))
            raise RuntimeError()
def load_records(context,
                 database,
                 table,
                 xml_dirpath,
                 recursive=False,
                 force_update=False):
    """Load metadata records from directory of files to database"""
    repo = repository.Repository(database, context, table=table)

    file_list = []

    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith('.xml'):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, '*.xml')):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except Exception as err:
            LOGGER.warn('XML document is not well-formed: %s', str(err))
            continue

        record = metadata.parse_record(context, exml, repo)

        for rec in record:
            LOGGER.info('Inserting %s %s into database %s, table %s ....',
                        rec.typename, rec.identifier, database, table)

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, 'local', util.get_today_and_now())
                LOGGER.info('Inserted')
            except RuntimeError as err:
                if force_update:
                    LOGGER.info('Record exists. Updating.')
                    repo.update(rec)
                    LOGGER.info('Updated')
                else:
                    LOGGER.warn('ERROR: not inserted %s', err)
Beispiel #4
0
def validate_xml(xml, xsd):
    """Validate XML document against XML Schema"""

    LOGGER.info('Validating %s against schema %s', xml, xsd)

    schema = etree.XMLSchema(file=xsd)

    try:
        valid = etree.parse(xml, PARSER)
        return 'Valid'
    except Exception as err:
        LOGGER.exception('Invalid XML')
        raise RuntimeError('ERROR: %s' % str(err)) from err
def validate_xml(xml, xsd):
    """Validate XML document against XML Schema"""

    LOGGER.info('Validating %s against schema %s', xml, xsd)

    schema = etree.XMLSchema(file=xsd)
    parser = etree.XMLParser(schema=schema, resolve_entities=False)

    try:
        valid = etree.parse(xml, parser)
        return 'Valid'
    except Exception as err:
        raise RuntimeError('ERROR: %s' % str(err))
Beispiel #6
0
def validate_xml(xml, xsd):
    """Validate XML document against XML Schema"""

    LOGGER.info('Validating %s against schema %s', xml, xsd)

    schema = etree.XMLSchema(file=xsd)
    parser = etree.XMLParser(schema=schema, resolve_entities=False)

    try:
        valid = etree.parse(xml, parser)
        return 'Valid'
    except Exception as err:
        raise RuntimeError('ERROR: %s' % str(err))
Beispiel #7
0
def validate_xml(xml, xsd):
    """Validate XML document against XML Schema"""

    LOGGER.info('Validating %s against schema %s', xml, xsd)

    schema = etree.XMLSchema(file=xsd)

    try:
        valid = etree.parse(xml, PARSER)
        return 'Valid'
    except Exception as err:
        LOGGER.exception('Invalid XML')
        raise RuntimeError('ERROR: %s' % str(err))
Beispiel #8
0
    def get_schemacomponents(self):
        ''' Return schema components as lxml.etree.Element list '''

        node = etree.Element(
        util.nspath_eval('csw:SchemaComponent', self.context.namespaces),
        schemaLanguage='XMLSCHEMA', targetNamespace=self.namespace)

        schema = etree.parse(os.path.join(self.context.pycsw_home,
                 'plugins', 'profiles', 'ebrim',
                 'schemas', 'ogc', 'csw', '2.0.2',
                 'profiles', 'ebrim', '1.0', 'csw-ebrim.xsd')).getroot()

        node.append(schema)

        return [node]
Beispiel #9
0
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False):
    """Load metadata records from directory of files to database"""
    repo = repository.Repository(database, context, table=table)

    file_list = []

    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith(".xml"):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, "*.xml")):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info("Processing file %s (%d of %d)", recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except Exception as err:
            LOGGER.warn("XML document is not well-formed: %s", str(err))
            continue

        record = metadata.parse_record(context, exml, repo)

        for rec in record:
            LOGGER.info(
                "Inserting %s %s into database %s, table %s ....", rec.typename, rec.identifier, database, table
            )

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, "local", util.get_today_and_now())
                LOGGER.info("Inserted")
            except RuntimeError as err:
                if force_update:
                    LOGGER.info("Record exists. Updating.")
                    repo.update(rec)
                    LOGGER.info("Updated")
                else:
                    LOGGER.warn("ERROR: not inserted %s", err)
Beispiel #10
0
    def get_schemacomponents(self):
        ''' Return schema components as lxml.etree.Element list '''

        node = etree.Element(util.nspath_eval('csw:SchemaComponent',
                                              self.context.namespaces),
                             schemaLanguage='XMLSCHEMA',
                             targetNamespace=self.namespace)

        schema = etree.parse(
            os.path.join(self.context.pycsw_home, 'plugins', 'profiles',
                         'ebrim', 'schemas', 'ogc', 'csw', '2.0.2', 'profiles',
                         'ebrim', '1.0', 'csw-ebrim.xsd')).getroot()

        node.append(schema)

        return [node]
Beispiel #11
0
    def get_schemacomponents(self):
        ''' Return schema components as lxml.etree.Element list '''

        schema_nodes = []

        for schema_path in self.schemas_paths:

            node = etree.Element(util.nspath_eval('csw:SchemaComponent',
                                                  self.context.namespaces),
                                 schemaLanguage='XMLSCHEMA',
                                 targetNamespace=self.namespace)

            schema_file = os.path.join(self.context.pycsw_home, *schema_path)

            schema = etree.parse(schema_file, self.context.parser).getroot()

            node.append(schema)

            schema_nodes.append(node)

        return schema_nodes
Beispiel #12
0
def load_records(context,
                 database,
                 table,
                 xml_dirpath,
                 recursive=False,
                 force_update=False):
    """Load metadata records from directory of files to database"""
    from sqlalchemy.exc import DBAPIError

    repo = repository.Repository(database, context, table=table)

    file_list = []

    loaded_files = set()
    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith('.xml'):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, '*.xml')):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except etree.XMLSyntaxError as err:
            LOGGER.error('XML document "%s" is not well-formed',
                         recfile,
                         exc_info=True)
            continue
        except Exception as err:
            LOGGER.exception('XML document "%s" is not well-formed', recfile)
            continue

        try:
            record = metadata.parse_record(context, exml, repo)
        except Exception as err:
            LOGGER.exception('Could not parse "%s" as an XML record', recfile)
            continue

        for rec in record:
            LOGGER.info('Inserting %s %s into database %s, table %s ....',
                        rec.typename, rec.identifier, database, table)

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, 'local', util.get_today_and_now())
                loaded_files.add(recfile)
                LOGGER.info('Inserted %s', recfile)
            except Exception as err:
                if force_update:
                    LOGGER.info('Record exists. Updating.')
                    repo.update(rec)
                    LOGGER.info('Updated %s', recfile)
                    loaded_files.add(recfile)
                else:
                    if isinstance(err, DBAPIError) and err.args:
                        # Pull a decent database error message and not the full SQL that was run
                        # since INSERT SQL statements are rather large.
                        LOGGER.error('ERROR: %s not inserted: %s',
                                     recfile,
                                     err.args[0],
                                     exc_info=True)
                    else:
                        LOGGER.error('ERROR: %s not inserted: %s',
                                     recfile,
                                     err,
                                     exc_info=True)

    return tuple(loaded_files)
Beispiel #13
0
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False):
    """Load metadata records from directory of files to database"""
    from sqlalchemy.exc import DBAPIError

    repo = repository.Repository(database, context, table=table)

    file_list = []

    loaded_files = set()
    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith('.xml'):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, '*.xml')):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except etree.XMLSyntaxError as err:
            LOGGER.error('XML document "%s" is not well-formed', recfile)
            continue
        except Exception as err:
            LOGGER.exception('XML document "%s" is not well-formed', recfile)
            continue

        try:
            record = metadata.parse_record(context, exml, repo)
        except Exception as err:
            LOGGER.exception('Could not parse "%s" as an XML record', recfile)
            continue

        for rec in record:
            LOGGER.info('Inserting %s %s into database %s, table %s ....',
                        rec.typename, rec.identifier, database, table)

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, 'local', util.get_today_and_now())
                loaded_files.add(recfile)
                LOGGER.info('Inserted %s', recfile)
            except Exception as err:
                if force_update:
                    LOGGER.info('Record exists. Updating.')
                    repo.update(rec)
                    LOGGER.info('Updated %s', recfile)
                    loaded_files.add(recfile)
                else:
                    if isinstance(err, DBAPIError) and err.args:
                        # Pull a decent database error message and not the full SQL that was run
                        # since INSERT SQL statements are rather large.
                        LOGGER.error('ERROR: %s not inserted: %s', recfile, err.args[0])
                    else:
                        LOGGER.error('ERROR: %s not inserted: %s', recfile, err)

    return tuple(loaded_files)
Beispiel #14
0
            elif os.path.isdir(xml_dirpath):
                files_names = [
                    os.path.join(xml_dirpath, f)
                    for f in os.listdir(xml_dirpath)
                ]
            else:
                print('Undefined xml files path in command line input')
                sys.exit(1)
            if not catalog_slug:
                print('Undefined catalog slug in command line input')
                sys.exit(1)

            # Create repository object with catalog slug.
            context = config.StaticContext()
            repo = RegistryRepository(PYCSW['repository']['database'],
                                      context,
                                      table=PYCSW['repository']['table'])
            repo.catalog = catalog_slug

            # Create index with mapping in Elasticsarch.
            create_index(catalog_slug)

            # Parse each xml file and insert records.
            for xml_file in files_names:
                parsed_xml = etree.parse(xml_file, context.parser)
                load_records(repo, parsed_xml, context)

        sys.exit(0)

    management.execute_from_command_line()