Esempio n. 1
0
def get_record(context, repo, ckan_url, ckan_id, ckan_info):
    query = ckan_url + "harvest/object/%s"
    url = query % ckan_info["harvest_object_id"]
    response = requests.get(url)

    if ckan_info["source"] == "arcgis":
        return

    try:
        xml = etree.parse(io.BytesIO(response.content))
    except Exception as err:
        log.error("Could not pass xml doc from %s, Error: %s" % (ckan_id, err))
        return

    try:
        record = metadata.parse_record(context, xml, repo)[0]
    except Exception as err:
        log.error("Could not extract metadata from %s, Error: %s" %
                  (ckan_id, err))
        return

    if not record.identifier:
        record.identifier = ckan_id
    record.ckan_id = ckan_id
    record.ckan_modified = ckan_info["metadata_modified"]

    return record
Esempio n. 2
0
    def _parse_and_upsert_metadata(self, md: str):
        logger.debug('Parsing XML')
        try:
            xml = etree.fromstring(md)
        except Exception as err:
            logger.error(f'XML parsing failed: {err}')
            raise

        logger.debug('Processing metadata')
        try:
            record = metadata.parse_record(self.context, xml, self.repo)[0]
            record.xml = record.xml.decode()
            logger.info(f"identifier: {record.identifier}")
        except Exception as err:
            logger.error(f'Metadata parsing failed: {err}')
            raise

        if self.repo.query_ids([record.identifier]):
            logger.info('Updating record')
            try:
                self.repo.update(record)
                logger.info('record updated')
            except Exception as err:
                logger.error(f'record update failed: {err}')
                raise
        else:
            logger.info('Inserting record')
            try:
                self.repo.insert(record, 'local', util.get_today_and_now())
                logger.info('record inserted')
            except Exception as err:
                logger.error(f'record insertion failed: {err}')
                raise

        return
Esempio n. 3
0
def load_records(repo, parsed_xml, context):
    """Load metadata records from directory of files to database"""
    xml_records = parsed_xml.xpath('//csw:Insert',
                                   namespaces=context.namespaces)[0]
    parsed_records = xml_records.xpath('child::*')
    parsed_records = [
        metadata.parse_record(context, f, repo)[0] for f in parsed_records
    ]

    [repo.insert(r, 'local', r.insert_date) for r in parsed_records]
Esempio n. 4
0
def load_records(context,
                 database,
                 table,
                 xml_dirpath,
                 recursive=False,
                 force_update=False):
    """Load metadata records from directory of files to database"""
    repo = repository.Repository(database, context, table=table)

    file_list = []

    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith('.xml'):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, '*.xml')):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except Exception as err:
            LOGGER.warn('XML document is not well-formed: %s', str(err))
            continue

        record = metadata.parse_record(context, exml, repo)

        for rec in record:
            LOGGER.info('Inserting %s %s into database %s, table %s ....',
                        rec.typename, rec.identifier, database, table)

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, 'local', util.get_today_and_now())
                LOGGER.info('Inserted')
            except RuntimeError as err:
                if force_update:
                    LOGGER.info('Record exists. Updating.')
                    repo.update(rec)
                    LOGGER.info('Updated')
                else:
                    LOGGER.warn('ERROR: not inserted %s', err)
Esempio n. 5
0
File: admin.py Progetto: bukun/pycsw
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False):
    """Load metadata records from directory of files to database"""
    repo = repository.Repository(database, context, table=table)

    file_list = []

    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith(".xml"):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, "*.xml")):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info("Processing file %s (%d of %d)", recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except Exception as err:
            LOGGER.warn("XML document is not well-formed: %s", str(err))
            continue

        record = metadata.parse_record(context, exml, repo)

        for rec in record:
            LOGGER.info(
                "Inserting %s %s into database %s, table %s ....", rec.typename, rec.identifier, database, table
            )

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, "local", util.get_today_and_now())
                LOGGER.info("Inserted")
            except RuntimeError as err:
                if force_update:
                    LOGGER.info("Record exists. Updating.")
                    repo.update(rec)
                    LOGGER.info("Updated")
                else:
                    LOGGER.warn("ERROR: not inserted %s", err)
Esempio n. 6
0
def load_records(context,
                 database,
                 table,
                 xml_dirpath,
                 recursive=False,
                 force_update=False):
    """Load metadata records from directory of files to database"""
    from sqlalchemy.exc import DBAPIError

    repo = repository.Repository(database, context, table=table)

    file_list = []

    loaded_files = set()
    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith('.xml'):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, '*.xml')):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except etree.XMLSyntaxError as err:
            LOGGER.error('XML document "%s" is not well-formed',
                         recfile,
                         exc_info=True)
            continue
        except Exception as err:
            LOGGER.exception('XML document "%s" is not well-formed', recfile)
            continue

        try:
            record = metadata.parse_record(context, exml, repo)
        except Exception as err:
            LOGGER.exception('Could not parse "%s" as an XML record', recfile)
            continue

        for rec in record:
            LOGGER.info('Inserting %s %s into database %s, table %s ....',
                        rec.typename, rec.identifier, database, table)

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, 'local', util.get_today_and_now())
                loaded_files.add(recfile)
                LOGGER.info('Inserted %s', recfile)
            except Exception as err:
                if force_update:
                    LOGGER.info('Record exists. Updating.')
                    repo.update(rec)
                    LOGGER.info('Updated %s', recfile)
                    loaded_files.add(recfile)
                else:
                    if isinstance(err, DBAPIError) and err.args:
                        # Pull a decent database error message and not the full SQL that was run
                        # since INSERT SQL statements are rather large.
                        LOGGER.error('ERROR: %s not inserted: %s',
                                     recfile,
                                     err.args[0],
                                     exc_info=True)
                    else:
                        LOGGER.error('ERROR: %s not inserted: %s',
                                     recfile,
                                     err,
                                     exc_info=True)

    return tuple(loaded_files)
Esempio n. 7
0
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False):
    """Load metadata records from directory of files to database"""
    from sqlalchemy.exc import DBAPIError

    repo = repository.Repository(database, context, table=table)

    file_list = []

    loaded_files = set()
    if os.path.isfile(xml_dirpath):
        file_list.append(xml_dirpath)
    elif recursive:
        for root, dirs, files in os.walk(xml_dirpath):
            for mfile in files:
                if mfile.endswith('.xml'):
                    file_list.append(os.path.join(root, mfile))
    else:
        for rec in glob(os.path.join(xml_dirpath, '*.xml')):
            file_list.append(rec)

    total = len(file_list)
    counter = 0

    for recfile in sorted(file_list):
        counter += 1
        LOGGER.info('Processing file %s (%d of %d)', recfile, counter, total)
        # read document
        try:
            exml = etree.parse(recfile, context.parser)
        except etree.XMLSyntaxError as err:
            LOGGER.error('XML document "%s" is not well-formed', recfile)
            continue
        except Exception as err:
            LOGGER.exception('XML document "%s" is not well-formed', recfile)
            continue

        try:
            record = metadata.parse_record(context, exml, repo)
        except Exception as err:
            LOGGER.exception('Could not parse "%s" as an XML record', recfile)
            continue

        for rec in record:
            LOGGER.info('Inserting %s %s into database %s, table %s ....',
                        rec.typename, rec.identifier, database, table)

            # TODO: do this as CSW Harvest
            try:
                repo.insert(rec, 'local', util.get_today_and_now())
                loaded_files.add(recfile)
                LOGGER.info('Inserted %s', recfile)
            except Exception as err:
                if force_update:
                    LOGGER.info('Record exists. Updating.')
                    repo.update(rec)
                    LOGGER.info('Updated %s', recfile)
                    loaded_files.add(recfile)
                else:
                    if isinstance(err, DBAPIError) and err.args:
                        # Pull a decent database error message and not the full SQL that was run
                        # since INSERT SQL statements are rather large.
                        LOGGER.error('ERROR: %s not inserted: %s', recfile, err.args[0])
                    else:
                        LOGGER.error('ERROR: %s not inserted: %s', recfile, err)

    return tuple(loaded_files)
Esempio n. 8
0
def get_record(context, repo, ckan_url, ckan_id, ckan_info):
    query = ckan_url + 'harvest/object/%s'
    url = query % ckan_info['harvest_object_id']
    response = requests.get(url)

    if ckan_info['source'] == 'arcgis':
        return

    try:
        xml = etree.parse(io.BytesIO(response.content))
    except Exception, err:
        log.error('Could not pass xml doc from %s, Error: %s' % (ckan_id, err))
        return

    try:
        record = metadata.parse_record(context, xml, repo)[0]
    except Exception, err:
        log.error('Could not extract metadata from %s, Error: %s' %
                  (ckan_id, err))
        return

    if not record.identifier:
        record.identifier = ckan_id
    record.ckan_id = ckan_id
    record.ckan_modified = ckan_info['metadata_modified']

    return record


usage = '''
Manages the CKAN-pycsw integration