Exemple #1
0
def refresh_harvested_records(context, database, table, url):
    """refresh / harvest all non-local records in repository"""
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    repos = repository.Repository(database, context, table=table)

    # get all harvested records
    count, records = repos.query(constraint={'where': 'source != "local"'})

    if int(count) > 0:
        LOGGER.info('Refreshing %s harvested records', count)
        csw = CatalogueServiceWeb(url)

        for rec in records:
            source = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Source'])
            schema = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Schema'])
            identifier = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Identifier'])

            LOGGER.info('Harvesting %s (identifier = %s) ...',
                        source, identifier)
            # TODO: find a smarter way of catching this
            if schema == 'http://www.isotc211.org/2005/gmd':
                schema = 'http://www.isotc211.org/schemas/2005/gmd/'
            try:
                csw.harvest(source, schema)
                LOGGER.info(csw.response)
            except Exception, err:
                LOGGER.warn(err)
Exemple #2
0
def refresh_harvested_records(context, database, table, url):
    """refresh / harvest all non-local records in repository"""
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    repos = repository.Repository(database, context, table=table)

    # get all harvested records
    count, records = repos.query(constraint={'where': 'source != "local"'})

    if int(count) > 0:
        LOGGER.info('Refreshing %s harvested records', count)
        csw = CatalogueServiceWeb(url)

        for rec in records:
            source = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Source'])
            schema = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Schema'])
            identifier = \
                getattr(rec,
                        context.md_core_model['mappings']['pycsw:Identifier'])

            LOGGER.info('Harvesting %s (identifier = %s) ...', source,
                        identifier)
            # TODO: find a smarter way of catching this
            if schema == 'http://www.isotc211.org/2005/gmd':
                schema = 'http://www.isotc211.org/schemas/2005/gmd/'
            try:
                csw.harvest(source, schema)
                LOGGER.info(csw.response)
            except Exception, err:
                LOGGER.warn(err)
Exemple #3
0
def refresh_harvested_records(context, database, table, url):
    """refresh / harvest all non-local records in repository"""
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    repos = repository.Repository(database, context, table=table)

    # get all harvested records
    count, records = repos.query(constraint={"where": 'mdsource != "local"', "values": []})

    if int(count) > 0:
        LOGGER.info("Refreshing %s harvested records", count)
        csw = CatalogueServiceWeb(url)

        for rec in records:
            source = getattr(rec, context.md_core_model["mappings"]["pycsw:Source"])
            schema = getattr(rec, context.md_core_model["mappings"]["pycsw:Schema"])
            identifier = getattr(rec, context.md_core_model["mappings"]["pycsw:Identifier"])

            LOGGER.info("Harvesting %s (identifier = %s) ...", source, identifier)
            # TODO: find a smarter way of catching this
            if schema == "http://www.isotc211.org/2005/gmd":
                schema = "http://www.isotc211.org/schemas/2005/gmd/"
            try:
                csw.harvest(source, schema)
                LOGGER.info(csw.response)
            except Exception as err:
                LOGGER.warn(err)
    else:
        LOGGER.info("No harvested records")
Exemple #4
0
def refresh_harvested_records(database, table, url):
    ''' refresh / harvest all non-local records in repository '''
    from owslib.csw import CatalogueServiceWeb

    # get configuration and init repo connection
    REPOS = repository.Repository(database, CONTEXT, table=table)

    # get all harvested records
    COUNT, RECORDS = REPOS.query(constraint={'where': 'source != "local"'})

    if int(COUNT) > 0:
        print 'Refreshing %s harvested records' % COUNT
        CSW = CatalogueServiceWeb(url)

        for rec in RECORDS:
            source = getattr(rec, 
            CONTEXT.md_core_model['mappings']['pycsw:Source'])
            schema = getattr(rec, 
            CONTEXT.md_core_model['mappings']['pycsw:Schema'])
            identifier = getattr(rec, 
            CONTEXT.md_core_model['mappings']['pycsw:Identifier'])

            print 'Harvesting %s (identifier = %s) ...' % \
            (source, identifier)
            # TODO: find a smarter way of catching this
            if schema == 'http://www.isotc211.org/2005/gmd':
                schema = 'http://www.isotc211.org/schemas/2005/gmd/'
            try:
                CSW.harvest(source, schema)
                print CSW.response
            except Exception, err:
                print err
Exemple #5
0
def harvest(source, dst):
    maxrecords = options["max"]
    if options["max"] == 0 or None:
        maxrecords = 10
    stop = 0
    flag = 0

    src = CatalogueServiceWeb(source)
    dest = CatalogueServiceWeb(dst)

    while stop == 0:
        if flag == 0:  # first run, start from 0
            startposition = 0
        else:  # subsequent run, startposition is now paged
            startposition = src.results["nextrecord"]

        src.getrecords(esn="brief",
                       startposition=startposition,
                       maxrecords=maxrecords)

        print(src.results)

        if (src.results["nextrecord"] == 0 or src.results["returned"] == 0
                or src.results["nextrecord"] >
                src.results["matches"]):  # end the loop, exhausted all records
            stop = 1
            break

        # harvest each record to destination CSW
        for i in list(src.records):
            source = "%s?service=CSW&version=2.0.2&request=GetRecordById&id=%s" % (
                sys.argv[1],
                i,
            )
            dest.harvest(source=source,
                         resourcetype="http://www.isotc211.org/2005/gmd")
            # print dest.request
            # print dest.response

        flag = 1
Exemple #6
0
if len(sys.argv) == 4:
    maxrecords = sys.argv[3]

while stop == 0:
    if flag == 0:  # first run, start from 0
        startposition = 0
    else:  # subsequent run, startposition is now paged
        startposition = src.results['nextrecord']

    src.getrecords(esn='brief', startposition=startposition, maxrecords=maxrecords)

    print src.results

    if src.results['nextrecord'] == 0 \
        or src.results['returned'] == 0 \
        or src.results['nextrecord'] > src.results['matches']:  # end the loop, exhausted all records
        stop = 1
        break

    # harvest each record to destination CSW
    for i in list(src.records):
        source = '%s?service=CSW&version=2.0.2&request=GetRecordById&id=%s' % \
            (sys.argv[1], i)
        dest.harvest(source=source, \
            resourcetype='http://www.isotc211.org/2005/gmd')
        #print dest.request
        #print dest.response

    flag = 1
Exemple #7
0
csw.transaction(ttype='update',
                typename='csw:Record',
                propertyname='dc:title',
                propertyvalue='New Title')
# update records satisfying keywords filter
csw.transaction(ttype='update',
                typename='csw:Record',
                propertyname='dc:title',
                propertyvalue='New Title',
                keywords=['birds', 'fowl'])
# update records satisfying BBOX filter
csw.transaction(ttype='update',
                typename='csw:Record',
                propertyname='dc:title',
                propertyvalue='New Title',
                bbox=[-141, 42, -52, 84])

# delete ALL records
csw.transaction(ttype='delete', typename='gmd:MD_Metadata')
# delete records satisfying keywords filter
csw.transaction(ttype='delete',
                typename='gmd:MD_Metadata',
                keywords=['birds', 'fowl'])
# delete records satisfying BBOX filter
csw.transaction(ttype='delete',
                typename='gmd:MD_Metadata',
                bbox=[-141, 42, -52, 84])

# Harvest a resource
csw.harvest('http://host/url.xml', 'http://www.isotc211.org/2005/gmd')
Exemple #8
0
if len(sys.argv) == 4:
    maxrecords = sys.argv[3]

while stop == 0:
    if flag == 0:  # first run, start from 0
        startposition = 0
    else:  # subsequent run, startposition is now paged
        startposition = src.results["nextrecord"]

    src.getrecords(esn="brief", startposition=startposition, maxrecords=maxrecords)

    print(src.results)

    if (
        src.results["nextrecord"] == 0
        or src.results["returned"] == 0
        or src.results["nextrecord"] > src.results["matches"]
    ):  # end the loop, exhausted all records
        stop = 1
        break

    # harvest each record to destination CSW
    for i in list(src.records):
        source = "%s?service=CSW&version=2.0.2&request=GetRecordById&id=%s" % (sys.argv[1], i)
        dest.harvest(source=source, resourcetype="http://www.isotc211.org/2005/gmd")
        # print dest.request
        # print dest.response

    flag = 1
Exemple #9
0
def harvest_csw_base(src_url, dest_url, resourcetype='http://www.opengis.net/cat/csw/2.0.2'):
    src = CatalogueServiceWeb(src_url)
    dest = CatalogueServiceWeb(dest_url)
    dest.harvest(source=src_url, resourcetype=resourcetype)