Ejemplo n.º 1
0
def list_metadata_formats(target, identifier):
    if target is not None:
        client = Client(target['url'], registry)
        metadata_formats = client.listMetadataFormats(identifier=identifier)
        results = []
        if metadata_formats is not None:
            for metadata_format in metadata_formats:
                results.append(convert_metadata_formats(metadata_format))
        return results
Ejemplo n.º 2
0
def list_metadata_formats(target, identifier):
    if target is not None:
        client = Client(target['url'], registry)
        metadata_formats = client.listMetadataFormats(identifier=identifier)
        results = []
        if metadata_formats is not None:
            for metadata_format in metadata_formats:
                results.append(convert_metadata_formats(metadata_format))
        return results
Ejemplo n.º 3
0
def get_client(url, transforms):
    transforms = fix_transforms(transforms)
    registry = MetadataRegistry()
    c = Client(url, registry)
    metadata = c.listMetadataFormats()
    metadata[0] = [
        'fbb', 'http://www.kulturarv.dk/fbb/fbb.xsd', 'http://www.kulturarv.dk/fbb']
    namespaces = dict((x[0], x[2]) for x in metadata)
    fields = dict((transform['field'], ('textList', transform['path']))
                  for transform in transforms)
    namespace = metadata[0][0]
    print namespaces,fields
    registry.registerReader(namespace, MetadataReader(fields=fields, namespaces=namespaces))
    return c, namespace
Ejemplo n.º 4
0
def get_client(url, transforms):
    transforms = fix_transforms(transforms)
    registry = MetadataRegistry()
    c = Client(url, registry)
    metadata = c.listMetadataFormats()
    metadata[0] = [
        'fbb', 'http://www.kulturarv.dk/fbb/fbb.xsd',
        'http://www.kulturarv.dk/fbb'
    ]
    namespaces = dict((x[0], x[2]) for x in metadata)
    fields = dict((transform['field'], ('textList', transform['path']))
                  for transform in transforms)
    namespace = metadata[0][0]
    print namespaces, fields
    registry.registerReader(
        namespace, MetadataReader(fields=fields, namespaces=namespaces))
    return c, namespace
Ejemplo n.º 5
0
def add_provider(cxn, args):
    """Add a new provider to the registry database.
    
    Process ``args`` to add a new provider to the registry database. Return 0
    for success, 1 for failure (error message should be logged).
    
    ``cxn`` => instance of ``sqlite3.Connection``
    ``args`` => instance of ``argparse.Namespace``
    """
    global logger, MAX_NAME_LENGTH
    addlogger = logger.getChild('add')
    # Validate name
    if len(args.name) > MAX_NAME_LENGTH:
        addlogger.critical('Short name for new provider must be no more than '
                           '{0} characters long'.format(MAX_NAME_LENGTH))
        return 1
    elif args.name.startswith(('http://', 'https://')) or args.name == 'all':
        addlogger.critical('Short name for new provider may not be "all" nor '
                           'may it begin "http://" or "https://"')
        return 1
    # Try to create row now to avoid unnecessary validation if duplicate
    try:
        cxn.execute("INSERT INTO providers(name, lastHarvest) values "
                         "(?, ?)",
                         (args.name, datetime.fromtimestamp(0))
        )
    except sqlite3.IntegrityError:
        addlogger.critical('Unable to add provider "{0}"; '
                           'provider with this name already exists'
                           ''.format(args.name)
                           )
        return 1
    else:
        addlogger.info('Adding provider "{0}"'.format(args.name))
    # Get any missing information
    # Base URL
    if args.url is None:
        args.url = raw_input('Base URL:'.ljust(20))
        if not args.url:
            addlogger.critical('Base URL for new provider not supplied')
            return 1
    # Set up an OAI-PMH client for validating providers
    md_registry = MetadataRegistry()
    md_registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(args.url, md_registry)
    # Validate Base URL by fetching Identify
    try:
        client.identify()
    except (XMLSyntaxError, HTTPError):
        addlogger.critical('Base URL for new provider does not return a valid '
                           'response to an `Identify` request')
        return 1
    # Destination
    if args.dest is None:
        args.dest = raw_input('Destination directory: '.ljust(20))
        if args.dest:
            # Expand user dir
            args.dest = os.path.expanduser(args.dest)
        else:
            addlogger.info('Destination for data for new provider not supplied'
                           ' using default `pwd`: {0}'.format(os.getcwd())
                           )
            args.dest = os.getcwd()
    # metadataPrefix
    # Check that selected metadataPrefix is available from provider
    # Fetch list of available formats
    mdps = dict((mdpinfo[0], mdpinfo[1:])
                    for mdpinfo in
                    client.listMetadataFormats())
    while args.metadataPrefix not in mdps:
        print "Available metadataPrefix values:"
        # List available formats
        for mdp in mdps:
            print mdp, '-', mdps[mdp][1]
        args.metadataPrefix = raw_input('metadataPrefix [oai_dc]:'.ljust(20))
        if not args.metadataPrefix:
            addlogger.info('metadataPrefix for new provider not supplied. '
                           'using default: oai_dc')
            args.metadataPrefix = 'oai_dc'
    cxn.execute("UPDATE providers SET "
                     "url=?, "
                     "destination=?, "
                     "metadataPrefix=? "
                     "WHERE name=?",
                     (args.url,
                      args.dest,
                      args.metadataPrefix,
                      args.name
                      )
    )
    addlogger.info('URL for next harvest: {0}?verb=ListRecords'
                   '&metadataPrefix={1}'
                   '&from={2:%Y-%m-%dT%H:%M:%SZ%z}'
                   ''.format(args.url,
                             args.metadataPrefix,
                             datetime.fromtimestamp(0)
                             )
                   )
    # All done, commit database
    cxn.commit()
    return 0
Ejemplo n.º 6
0
def add_provider(cxn, args):
    """Add a new provider to the registry database.
    
    Process ``args`` to add a new provider to the registry database. Return 0
    for success, 1 for failure (error message should be logged).
    
    ``cxn`` => instance of ``sqlite3.Connection``
    ``args`` => instance of ``argparse.Namespace``
    """
    global logger, MAX_NAME_LENGTH
    addlogger = logger.getChild('add')
    # Validate name
    if len(args.name) > MAX_NAME_LENGTH:
        addlogger.critical('Short name for new provider must be no more than '
                           '{0} characters long'.format(MAX_NAME_LENGTH))
        return 1
    elif args.name.startswith(('http://', 'https://')) or args.name == 'all':
        addlogger.critical('Short name for new provider may not be "all" nor '
                           'may it begin "http://" or "https://"')
        return 1
    # Try to create row now to avoid unnecessary validation if duplicate
    try:
        cxn.execute(
            "INSERT INTO providers(name, lastHarvest) values "
            "(?, ?)", (args.name, datetime.fromtimestamp(0)))
    except sqlite3.IntegrityError:
        addlogger.critical('Unable to add provider "{0}"; '
                           'provider with this name already exists'
                           ''.format(args.name))
        return 1
    else:
        addlogger.info('Adding provider "{0}"'.format(args.name))
    # Get any missing information
    # Base URL
    if args.url is None:
        args.url = raw_input('Base URL:'.ljust(20))
        if not args.url:
            addlogger.critical('Base URL for new provider not supplied')
            return 1
    # Set up an OAI-PMH client for validating providers
    md_registry = MetadataRegistry()
    md_registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(args.url, md_registry)
    # Validate Base URL by fetching Identify
    try:
        client.identify()
    except (XMLSyntaxError, HTTPError):
        addlogger.critical('Base URL for new provider does not return a valid '
                           'response to an `Identify` request')
        return 1
    # Destination
    if args.dest is None:
        args.dest = raw_input('Destination directory: '.ljust(20))
        if args.dest:
            # Expand user dir
            args.dest = os.path.expanduser(args.dest)
        else:
            addlogger.info('Destination for data for new provider not supplied'
                           ' using default `pwd`: {0}'.format(os.getcwd()))
            args.dest = os.getcwd()
    # metadataPrefix
    # Check that selected metadataPrefix is available from provider
    # Fetch list of available formats
    mdps = dict(
        (mdpinfo[0], mdpinfo[1:]) for mdpinfo in client.listMetadataFormats())
    while args.metadataPrefix not in mdps:
        print "Available metadataPrefix values:"
        # List available formats
        for mdp in mdps:
            print mdp, '-', mdps[mdp][1]
        args.metadataPrefix = raw_input('metadataPrefix [oai_dc]:'.ljust(20))
        if not args.metadataPrefix:
            addlogger.info('metadataPrefix for new provider not supplied. '
                           'using default: oai_dc')
            args.metadataPrefix = 'oai_dc'
    cxn.execute(
        "UPDATE providers SET "
        "url=?, "
        "destination=?, "
        "metadataPrefix=? "
        "WHERE name=?", (args.url, args.dest, args.metadataPrefix, args.name))
    addlogger.info('URL for next harvest: {0}?verb=ListRecords'
                   '&metadataPrefix={1}'
                   '&from={2:%Y-%m-%dT%H:%M:%SZ%z}'
                   ''.format(args.url, args.metadataPrefix,
                             datetime.fromtimestamp(0)))
    # All done, commit database
    cxn.commit()
    return 0
Ejemplo n.º 7
0
registry = metadata.MetadataRegistry()
registry.registerReader('oai_dc', metadata.oai_dc_reader)
registry.registerReader('marc21', marcxml_reader)


#### OAI-PMH Client processing 

oai = Client('http://snape.mzk.cz/OAI-script', registry)

id = oai.identify()
print id.repositoryName()
print id.adminEmails()
print id.baseURL()

formats = oai.listMetadataFormats()
pprint formats

# 'marc21'

sets = oai.listSets()
for s in sets:
	print s

# 'MZK03'

recids = oai.listIdentifiers(metadataPrefix='marc21', set='MZK03') # from_='2003-01-01T00:00:00Z', until=''

# for example: 'MZK03-907223' is in the list of maps
# or 356050 *not a map