def refresh_harvested_records(context, database, table, url): """refresh / harvest all non-local records in repository""" from owslib.csw import CatalogueServiceWeb # get configuration and init repo connection repos = repository.Repository(database, context, table=table) # get all harvested records count, records = repos.query(constraint={'where': 'source != "local"'}) if int(count) > 0: LOGGER.info('Refreshing %s harvested records', count) csw = CatalogueServiceWeb(url) for rec in records: source = \ getattr(rec, context.md_core_model['mappings']['pycsw:Source']) schema = \ getattr(rec, context.md_core_model['mappings']['pycsw:Schema']) identifier = \ getattr(rec, context.md_core_model['mappings']['pycsw:Identifier']) LOGGER.info('Harvesting %s (identifier = %s) ...', source, identifier) # TODO: find a smarter way of catching this if schema == 'http://www.isotc211.org/2005/gmd': schema = 'http://www.isotc211.org/schemas/2005/gmd/' try: csw.harvest(source, schema) LOGGER.info(csw.response) except Exception, err: LOGGER.warn(err)
def refresh_harvested_records(context, database, table, url): """refresh / harvest all non-local records in repository""" from owslib.csw import CatalogueServiceWeb # get configuration and init repo connection repos = repository.Repository(database, context, table=table) # get all harvested records count, records = repos.query(constraint={'where': 'source != "local"'}) if int(count) > 0: LOGGER.info('Refreshing %s harvested records', count) csw = CatalogueServiceWeb(url) for rec in records: source = \ getattr(rec, context.md_core_model['mappings']['pycsw:Source']) schema = \ getattr(rec, context.md_core_model['mappings']['pycsw:Schema']) identifier = \ getattr(rec, context.md_core_model['mappings']['pycsw:Identifier']) LOGGER.info('Harvesting %s (identifier = %s) ...', source, identifier) # TODO: find a smarter way of catching this if schema == 'http://www.isotc211.org/2005/gmd': schema = 'http://www.isotc211.org/schemas/2005/gmd/' try: csw.harvest(source, schema) LOGGER.info(csw.response) except Exception, err: LOGGER.warn(err)
def refresh_harvested_records(context, database, table, url): """refresh / harvest all non-local records in repository""" from owslib.csw import CatalogueServiceWeb # get configuration and init repo connection repos = repository.Repository(database, context, table=table) # get all harvested records count, records = repos.query(constraint={"where": 'mdsource != "local"', "values": []}) if int(count) > 0: LOGGER.info("Refreshing %s harvested records", count) csw = CatalogueServiceWeb(url) for rec in records: source = getattr(rec, context.md_core_model["mappings"]["pycsw:Source"]) schema = getattr(rec, context.md_core_model["mappings"]["pycsw:Schema"]) identifier = getattr(rec, context.md_core_model["mappings"]["pycsw:Identifier"]) LOGGER.info("Harvesting %s (identifier = %s) ...", source, identifier) # TODO: find a smarter way of catching this if schema == "http://www.isotc211.org/2005/gmd": schema = "http://www.isotc211.org/schemas/2005/gmd/" try: csw.harvest(source, schema) LOGGER.info(csw.response) except Exception as err: LOGGER.warn(err) else: LOGGER.info("No harvested records")
def refresh_harvested_records(database, table, url): ''' refresh / harvest all non-local records in repository ''' from owslib.csw import CatalogueServiceWeb # get configuration and init repo connection REPOS = repository.Repository(database, CONTEXT, table=table) # get all harvested records COUNT, RECORDS = REPOS.query(constraint={'where': 'source != "local"'}) if int(COUNT) > 0: print 'Refreshing %s harvested records' % COUNT CSW = CatalogueServiceWeb(url) for rec in RECORDS: source = getattr(rec, CONTEXT.md_core_model['mappings']['pycsw:Source']) schema = getattr(rec, CONTEXT.md_core_model['mappings']['pycsw:Schema']) identifier = getattr(rec, CONTEXT.md_core_model['mappings']['pycsw:Identifier']) print 'Harvesting %s (identifier = %s) ...' % \ (source, identifier) # TODO: find a smarter way of catching this if schema == 'http://www.isotc211.org/2005/gmd': schema = 'http://www.isotc211.org/schemas/2005/gmd/' try: CSW.harvest(source, schema) print CSW.response except Exception, err: print err
def harvest(source, dst): maxrecords = options["max"] if options["max"] == 0 or None: maxrecords = 10 stop = 0 flag = 0 src = CatalogueServiceWeb(source) dest = CatalogueServiceWeb(dst) while stop == 0: if flag == 0: # first run, start from 0 startposition = 0 else: # subsequent run, startposition is now paged startposition = src.results["nextrecord"] src.getrecords(esn="brief", startposition=startposition, maxrecords=maxrecords) print(src.results) if (src.results["nextrecord"] == 0 or src.results["returned"] == 0 or src.results["nextrecord"] > src.results["matches"]): # end the loop, exhausted all records stop = 1 break # harvest each record to destination CSW for i in list(src.records): source = "%s?service=CSW&version=2.0.2&request=GetRecordById&id=%s" % ( sys.argv[1], i, ) dest.harvest(source=source, resourcetype="http://www.isotc211.org/2005/gmd") # print dest.request # print dest.response flag = 1
if len(sys.argv) == 4: maxrecords = sys.argv[3] while stop == 0: if flag == 0: # first run, start from 0 startposition = 0 else: # subsequent run, startposition is now paged startposition = src.results['nextrecord'] src.getrecords(esn='brief', startposition=startposition, maxrecords=maxrecords) print src.results if src.results['nextrecord'] == 0 \ or src.results['returned'] == 0 \ or src.results['nextrecord'] > src.results['matches']: # end the loop, exhausted all records stop = 1 break # harvest each record to destination CSW for i in list(src.records): source = '%s?service=CSW&version=2.0.2&request=GetRecordById&id=%s' % \ (sys.argv[1], i) dest.harvest(source=source, \ resourcetype='http://www.isotc211.org/2005/gmd') #print dest.request #print dest.response flag = 1
csw.transaction(ttype='update', typename='csw:Record', propertyname='dc:title', propertyvalue='New Title') # update records satisfying keywords filter csw.transaction(ttype='update', typename='csw:Record', propertyname='dc:title', propertyvalue='New Title', keywords=['birds', 'fowl']) # update records satisfying BBOX filter csw.transaction(ttype='update', typename='csw:Record', propertyname='dc:title', propertyvalue='New Title', bbox=[-141, 42, -52, 84]) # delete ALL records csw.transaction(ttype='delete', typename='gmd:MD_Metadata') # delete records satisfying keywords filter csw.transaction(ttype='delete', typename='gmd:MD_Metadata', keywords=['birds', 'fowl']) # delete records satisfying BBOX filter csw.transaction(ttype='delete', typename='gmd:MD_Metadata', bbox=[-141, 42, -52, 84]) # Harvest a resource csw.harvest('http://host/url.xml', 'http://www.isotc211.org/2005/gmd')
if len(sys.argv) == 4: maxrecords = sys.argv[3] while stop == 0: if flag == 0: # first run, start from 0 startposition = 0 else: # subsequent run, startposition is now paged startposition = src.results["nextrecord"] src.getrecords(esn="brief", startposition=startposition, maxrecords=maxrecords) print(src.results) if ( src.results["nextrecord"] == 0 or src.results["returned"] == 0 or src.results["nextrecord"] > src.results["matches"] ): # end the loop, exhausted all records stop = 1 break # harvest each record to destination CSW for i in list(src.records): source = "%s?service=CSW&version=2.0.2&request=GetRecordById&id=%s" % (sys.argv[1], i) dest.harvest(source=source, resourcetype="http://www.isotc211.org/2005/gmd") # print dest.request # print dest.response flag = 1
def harvest_csw_base(src_url, dest_url, resourcetype='http://www.opengis.net/cat/csw/2.0.2'): src = CatalogueServiceWeb(src_url) dest = CatalogueServiceWeb(dest_url) dest.harvest(source=src_url, resourcetype=resourcetype)