def list_metadata_formats(target, identifier): if target is not None: client = Client(target['url'], registry) metadata_formats = client.listMetadataFormats(identifier=identifier) results = [] if metadata_formats is not None: for metadata_format in metadata_formats: results.append(convert_metadata_formats(metadata_format)) return results
def get_client(url, transforms): transforms = fix_transforms(transforms) registry = MetadataRegistry() c = Client(url, registry) metadata = c.listMetadataFormats() metadata[0] = [ 'fbb', 'http://www.kulturarv.dk/fbb/fbb.xsd', 'http://www.kulturarv.dk/fbb'] namespaces = dict((x[0], x[2]) for x in metadata) fields = dict((transform['field'], ('textList', transform['path'])) for transform in transforms) namespace = metadata[0][0] print namespaces,fields registry.registerReader(namespace, MetadataReader(fields=fields, namespaces=namespaces)) return c, namespace
def get_client(url, transforms): transforms = fix_transforms(transforms) registry = MetadataRegistry() c = Client(url, registry) metadata = c.listMetadataFormats() metadata[0] = [ 'fbb', 'http://www.kulturarv.dk/fbb/fbb.xsd', 'http://www.kulturarv.dk/fbb' ] namespaces = dict((x[0], x[2]) for x in metadata) fields = dict((transform['field'], ('textList', transform['path'])) for transform in transforms) namespace = metadata[0][0] print namespaces, fields registry.registerReader( namespace, MetadataReader(fields=fields, namespaces=namespaces)) return c, namespace
def add_provider(cxn, args): """Add a new provider to the registry database. Process ``args`` to add a new provider to the registry database. Return 0 for success, 1 for failure (error message should be logged). ``cxn`` => instance of ``sqlite3.Connection`` ``args`` => instance of ``argparse.Namespace`` """ global logger, MAX_NAME_LENGTH addlogger = logger.getChild('add') # Validate name if len(args.name) > MAX_NAME_LENGTH: addlogger.critical('Short name for new provider must be no more than ' '{0} characters long'.format(MAX_NAME_LENGTH)) return 1 elif args.name.startswith(('http://', 'https://')) or args.name == 'all': addlogger.critical('Short name for new provider may not be "all" nor ' 'may it begin "http://" or "https://"') return 1 # Try to create row now to avoid unnecessary validation if duplicate try: cxn.execute("INSERT INTO providers(name, lastHarvest) values " "(?, ?)", (args.name, datetime.fromtimestamp(0)) ) except sqlite3.IntegrityError: addlogger.critical('Unable to add provider "{0}"; ' 'provider with this name already exists' ''.format(args.name) ) return 1 else: addlogger.info('Adding provider "{0}"'.format(args.name)) # Get any missing information # Base URL if args.url is None: args.url = raw_input('Base URL:'.ljust(20)) if not args.url: addlogger.critical('Base URL for new provider not supplied') return 1 # Set up an OAI-PMH client for validating providers md_registry = MetadataRegistry() md_registry.registerReader('oai_dc', oai_dc_reader) client = Client(args.url, md_registry) # Validate Base URL by fetching Identify try: client.identify() except (XMLSyntaxError, HTTPError): addlogger.critical('Base URL for new provider does not return a valid ' 'response to an `Identify` request') return 1 # Destination if args.dest is None: args.dest = raw_input('Destination directory: '.ljust(20)) if args.dest: # Expand user dir args.dest = os.path.expanduser(args.dest) else: addlogger.info('Destination for data for new provider not supplied' ' using default `pwd`: {0}'.format(os.getcwd()) ) args.dest = os.getcwd() # metadataPrefix # Check that selected metadataPrefix is available from provider # Fetch list of available formats mdps = dict((mdpinfo[0], mdpinfo[1:]) for mdpinfo in client.listMetadataFormats()) while args.metadataPrefix not in mdps: print "Available metadataPrefix values:" # List available formats for mdp in mdps: print mdp, '-', mdps[mdp][1] args.metadataPrefix = raw_input('metadataPrefix [oai_dc]:'.ljust(20)) if not args.metadataPrefix: addlogger.info('metadataPrefix for new provider not supplied. ' 'using default: oai_dc') args.metadataPrefix = 'oai_dc' cxn.execute("UPDATE providers SET " "url=?, " "destination=?, " "metadataPrefix=? " "WHERE name=?", (args.url, args.dest, args.metadataPrefix, args.name ) ) addlogger.info('URL for next harvest: {0}?verb=ListRecords' '&metadataPrefix={1}' '&from={2:%Y-%m-%dT%H:%M:%SZ%z}' ''.format(args.url, args.metadataPrefix, datetime.fromtimestamp(0) ) ) # All done, commit database cxn.commit() return 0
def add_provider(cxn, args): """Add a new provider to the registry database. Process ``args`` to add a new provider to the registry database. Return 0 for success, 1 for failure (error message should be logged). ``cxn`` => instance of ``sqlite3.Connection`` ``args`` => instance of ``argparse.Namespace`` """ global logger, MAX_NAME_LENGTH addlogger = logger.getChild('add') # Validate name if len(args.name) > MAX_NAME_LENGTH: addlogger.critical('Short name for new provider must be no more than ' '{0} characters long'.format(MAX_NAME_LENGTH)) return 1 elif args.name.startswith(('http://', 'https://')) or args.name == 'all': addlogger.critical('Short name for new provider may not be "all" nor ' 'may it begin "http://" or "https://"') return 1 # Try to create row now to avoid unnecessary validation if duplicate try: cxn.execute( "INSERT INTO providers(name, lastHarvest) values " "(?, ?)", (args.name, datetime.fromtimestamp(0))) except sqlite3.IntegrityError: addlogger.critical('Unable to add provider "{0}"; ' 'provider with this name already exists' ''.format(args.name)) return 1 else: addlogger.info('Adding provider "{0}"'.format(args.name)) # Get any missing information # Base URL if args.url is None: args.url = raw_input('Base URL:'.ljust(20)) if not args.url: addlogger.critical('Base URL for new provider not supplied') return 1 # Set up an OAI-PMH client for validating providers md_registry = MetadataRegistry() md_registry.registerReader('oai_dc', oai_dc_reader) client = Client(args.url, md_registry) # Validate Base URL by fetching Identify try: client.identify() except (XMLSyntaxError, HTTPError): addlogger.critical('Base URL for new provider does not return a valid ' 'response to an `Identify` request') return 1 # Destination if args.dest is None: args.dest = raw_input('Destination directory: '.ljust(20)) if args.dest: # Expand user dir args.dest = os.path.expanduser(args.dest) else: addlogger.info('Destination for data for new provider not supplied' ' using default `pwd`: {0}'.format(os.getcwd())) args.dest = os.getcwd() # metadataPrefix # Check that selected metadataPrefix is available from provider # Fetch list of available formats mdps = dict( (mdpinfo[0], mdpinfo[1:]) for mdpinfo in client.listMetadataFormats()) while args.metadataPrefix not in mdps: print "Available metadataPrefix values:" # List available formats for mdp in mdps: print mdp, '-', mdps[mdp][1] args.metadataPrefix = raw_input('metadataPrefix [oai_dc]:'.ljust(20)) if not args.metadataPrefix: addlogger.info('metadataPrefix for new provider not supplied. ' 'using default: oai_dc') args.metadataPrefix = 'oai_dc' cxn.execute( "UPDATE providers SET " "url=?, " "destination=?, " "metadataPrefix=? " "WHERE name=?", (args.url, args.dest, args.metadataPrefix, args.name)) addlogger.info('URL for next harvest: {0}?verb=ListRecords' '&metadataPrefix={1}' '&from={2:%Y-%m-%dT%H:%M:%SZ%z}' ''.format(args.url, args.metadataPrefix, datetime.fromtimestamp(0))) # All done, commit database cxn.commit() return 0
registry = metadata.MetadataRegistry() registry.registerReader('oai_dc', metadata.oai_dc_reader) registry.registerReader('marc21', marcxml_reader) #### OAI-PMH Client processing oai = Client('http://snape.mzk.cz/OAI-script', registry) id = oai.identify() print id.repositoryName() print id.adminEmails() print id.baseURL() formats = oai.listMetadataFormats() pprint formats # 'marc21' sets = oai.listSets() for s in sets: print s # 'MZK03' recids = oai.listIdentifiers(metadataPrefix='marc21', set='MZK03') # from_='2003-01-01T00:00:00Z', until='' # for example: 'MZK03-907223' is in the list of maps # or 356050 *not a map