Esempio n. 1
0
def main():
  collection_ids = [0]
  #collection_ids = [13,10,6,9,14,8]
  #collection_ids = [14] # Bob Day
  #collection_ids = [13] # 30th General Hospital
  #collection_ids = [8] # speck
  #collection_ids = [9] # School_of_Dentistry_130
  nx = utils.Nuxeo()
  pp = pprint.PrettyPrinter()

  links = {} 
  with open(hardlinks, "r") as h:
    for line in h:
      line = line.rstrip('\n')
      line = line.split(' ')
      links[line[0]] = line[1]
  
  for collection_id in collection_ids:
      # get items metadata 
      items_metadata = omnux.extract_items(api_url, collection_id)
      print 'Number of items in items_metadata', collection_id, ':', len(items_metadata)
 
      # transform and load
      for item in items_metadata:
        payload = omnux.transform_omeka_to_ucldc(item, collection_id, omnux_fieldmap_json, collection_mapping_json, links, corpnames)
        pp.pprint(payload)
        '''
Esempio n. 2
0
def main():
  collection_ids = [13,10,6,9,14,8]
  nx = utils.Nuxeo()

  for collection_id in collection_ids:
    with open("./ucsf_map.json") as cf:
      collection_mapping_data = json.load(cf)
    
    collection_name = omnux.get_collection_property(collection_mapping_data, collection_id, "name") 
    output_file = os.path.join(outputdir, collection_name)
    try:
        os.remove(output_file)
    except OSError:
        pass

    items_metadata = omnux.extract_items(api_url, collection_id)
    with open(output_file, "a+") as f:
        for item in items_metadata:
            path = omnux.get_item_filename(item) 
            f.write(path + "\n")
Esempio n. 3
0
def main():
    metadata = {}
    sources = []

    collection_id = 0 # all collections
    items_metadata = omnux.extract_items(API_URL, collection_id)

    import pprint
    pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(items_metadata)

    for item in items_metadata:
        omeka_id = item['id']
        dc_identifier = ''
        source = ''
        element_texts = item['element_texts']
        for element in element_texts:
            text, element_set_name, element_name = omnux.get_element_text(element)
            if element_name == 'Identifier':
                dc_identifier = text
            if element_name == 'Source':
                source = text
                if source not in sources:
                    sources.append(source)

        if dc_identifier:
            metadata[dc_identifier] = {'omeka_id': omeka_id, 'source': source}

    with codecs.open('ucsf_sources.json', 'w', 'utf-8') as fp:
        for s in sorted(sources):
            fp.write(s)
            fp.write('\n')

    j = json.dumps(metadata, indent=4)
    with open('ucsf_id_map.json', 'w') as fp:
        fp.write(j)    
Esempio n. 4
0
#! /usr/bin/env python
#coding=utf-8
import omnux
import json
OMEKA_API = u'https://digital.library.ucsf.edu/api/'

items_metadata = omnux.extract_items(OMEKA_API, 15)
with open('omeka_md_berne.json', 'w') as f:
    json.dump(items_metadata, f, indent=3)