def setUp(self): # load the three xml poetry objects self.poetry = dict() for file in self.FIXTURES: filebase = file.split('.')[0] self.poetry[filebase] = xmlmap.load_xmlobject_from_file(path.join(exist_fixture_path, file), TestPoetryBook) # load the poet fixture docAuthor self.poet = xmlmap.load_xmlobject_from_string(self.POET_STRING, Poet)
def handle(self, cards_fname, image_dir, dry_run=False, **options): verbosity = int(options['verbosity']) # 1 = normal, 0 = minimal, 2 = all v_normal = 1 #populate usr and pass repo_args = {} if options.get('username') is not None: repo_args['username'] = options.get('username') if options.get('password') is not None: repo_args['password'] = options.get('password') repo = Repository(**repo_args) collection = PostcardCollection.get() if not collection.exists: raise Exception(collection.pid + " is not in the repository. Do you need to syncrepo?") def anas_simple(my_dict,a): for ana in my_dict: if ana in a: return my_dict[ana] # def anas_complex(a,b): # for ana in ana_lcc: # if ana not in a: # return FALSE # if ana in b: # return ana_lcc[ana] #dictionary of lc subjects, simple (using 1 ana id) and complex (using 2 ana ids). ana_lcs = {"nat-it":"World War, 1914-1918--Italy", "nat-fr":"World War, 1914-1918--France", "nat-us":"World War, 1914-1918--United States", "nat-de":"World War, 1914-1918--Germany", "nat-brit":"World War, 1914-1918--Great Britain", "nat-bel":"World War, 1914-1918--Belgium", "nat-au":"World War, 1914-1918--Austria", "nat-nl":"World War, 1914-1918--Netherlands", "nat-rus":"World War, 1914-1918--Russia", "nat-jp":" World War, 1914-1918--Japan", "nat-ee":"World War, 1914-1918--Eastern Europe", "nat-ca":"World War, 1914-1918--Canada", "nat-hu":"World War, 1914-1918--Hungary", "mil-nur":"Military Nursing", "con-h":"World War, 1914-1918--Humor", "con-v":"World War, 1914-1918--Poetry", "con-p":"World War, 1914-1918--Persons", "con-m":"World War, 1914-1918--Memorials", "con-r":"World War, 1914-1918--Destruction and pillage", "con-f":"Flags in art", "con-el":"Uncle Elmer", "hf-p":"World War, 1914-1918--Propaganda", "hf-c":"World War, 1914-1918--Children", "hf-w":"World War, 1914-1918--Women", "hf-re":"World War, 1914-1918--Religious aspects", "hf-ro":"World War, 1914-1918--Man-Woman relationships", } ana_lcc_army = {"nat-fr":u"France. Arm\xe9e", "nat-brit":"Great Britain. Army", "nat-bel":u"Belgium. Arm\xe9e", "nat-de":"Germany. Heer", "nat-us":"United States. Army", "nat-ca":"Canada. Canadian Army", "nat-jp":"Japan. Rikugun", "nat-au":u"Austria. Arm\xe9e", } ana_lcc_navy = {"nat-brit":"Royal Navy. Great Britain", "nat-us":"United States. Navy", "nat-fr":"France. Marine", "nat-de":"Germany. Kriegsmarine", "nat-ca":"Canada. Royal Canadian Navy", } #images use dc:type ana_lcimage = {"im-ph":"photograph", "im-pa":"painting", "im-dr":"drawing", "im-ca":"cartoon", "im-en":"engraving", "im-po":"poster", "im-s":"silk postcard", } #use dc:coverage ana_lccoverage = {"t-wwi":"1914-1918", "t-pre":"Before 1914", "t-post":"After 1918", "t-ww2":"1939-1945", "t-post2":"After 1945", } # make a dictionary of subjects so type and value is easily accessible by id interps = collection.interp.content.interp_groups subjects = {} for group in interps: for interp in group.interp: subjects[interp.id] = (group.type, interp.value) cards_tei = load_xmlobject_from_file(cards_fname, xmlclass=Tei) cards = cards_tei.body.all_figures files = 0 ingested = 0 for c in cards: file = os.path.join(image_dir, '%s.tif' % c.entity) if os.access(file, os.F_OK): if verbosity >= v_normal: print "Found master file %s for %s" % (file, c.entity) else: file = os.path.join(image_dir, 'wwi_%s.tif' % c.entity) if os.access(file, os.F_OK): if verbosity >= v_normal: print "Found master file %s for %s" % (file, c.entity) else: if verbosity >= v_normal: print "File not found for %s" % c.entity continue files += 1 obj = repo.get_object(type=ImageObject) obj.dc.content.identifier_list.append(c.entity) # Store local identifiers in DC obj.label = c.head obj.owner = settings.FEDORA_OBJECT_OWNERID obj.dc.content.title = obj.label #append label so postcard description can be identified in the description elements obj.dc.content.description_list.append('%s%s' % (settings.POSTCARD_DESCRIPTION_LABEL, c.description)) #Add floating text from postcards (text written on the card) float_lines = [] # list of lines of text from the postcard f_text = c.floatingText if len(f_text) > 0: f_text = f_text[0] if f_text.head: float_lines.append(f_text.head) if len(f_text.line_group) > 0: for group in f_text.line_group: if group.head is not None: #treat head as normal line float_lines.append(group.head) for line in group.line: #add the rest of the lines float_lines.append(line) float_lines.append('\n') #each linegroup needs an extra \n at the end to make a paragraph elif len(f_text.line) > 0: for line in f_text.line: float_lines.append(line) float_lines = map(unicode, float_lines) #convert all lines to unicode float_lines = str.join("\n", float_lines) #Add \n for each line break and convert to a str #append label so floating text (postcard text) can be identified in the description elements obj.dc.content.description_list.append('%s%s' % (settings.POSTCARD_FLOATINGTEXT_LABEL, float_lines)) # convert interp text into dc: subjects local_subjects = [] for ana_id in c.ana.split(): # ana_id = c.ana.split() if ana_id in subjects: local_subjects.append('%s: %s' % subjects[ana_id]) else: print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.subject_list.extend(local_subjects) lc_subjects = [] ana_ids = [] ana_ids = c.ana.split() if verbosity > v_normal: print 'DEBUG: %s are the ana ids for %s' % (ana_ids, c.entity) for ana_id in ana_ids: if ana_id in ana_lcc_army: for ana_id2 in ana_ids: if ana_id2 == "mil-a": ana_lc = ana_lcc_army[ana_id] lc_subjects.append('%s' % ana_lc) print '%s added to LC subjects list-army or navy' % ana_lc if ana_id in ana_lcc_navy: for ana_id2 in ana_ids: if ana_id2 == "mil-na": ana_lc = ana_lcc_navy[ana_id] lc_subjects.append('%s' % ana_lc) print '%s added to LC subjects list-army or navy' % ana_lc if ana_id in ana_lcs: ana_lc = anas_simple(ana_lcs, ana_id) lc_subjects.append('%s' % ana_lc) print '%s added to LC subjects list-nat, mil-nur, con, hf' % (ana_lc) # else: # print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.subject_list.extend(lc_subjects) for ana_id in ana_ids: my_dict = ana_lcimage if ana_id in my_dict: # print 'DEBUG %s found in image list' % ana_id ana_image = anas_simple(my_dict, ana_id) # print 'DEBUG %s is the value for %s' % (ana_image, ana_id) lc_subjects.append('%s' % ana_image) print '%s added to LC subjects list-image type' % (ana_image) # else: # print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.type_list.extend(lc_subjects) for ana_id in ana_ids: my_dict = ana_lccoverage if ana_id in my_dict: ana_cover = anas_simple(my_dict,ana_id) lc_subjects.append('%s' % ana_cover) print '%s added to LC subjects list-coverage' % ana_cover # else: # print 'ana id %s not recognized for %s' % (ana_id, c.entity) obj.dc.content.coverage_list.extend(lc_subjects) # common DC for all postcards obj.dc.content.type = 'image' obj.dc.content.type = 'postcard' obj.dc.content.relation_list.extend([settings.RELATION, 'http://beck.library.emory.edu/greatwar/']) # set file as content of image datastream obj.image.content = open(file) # add relation to postcard collection obj.rels_ext.content.add(( URIRef(obj.uri), URIRef(MEMBER_OF_COLLECTION), URIRef(PostcardCollection.get().uri) )) # TODO: OAI identifier ? if verbosity > v_normal: print "Dublin Core\t\n", obj.dc.content.serialize(pretty=True) print "RELS-EXT \t\n", obj.rels_ext.content.serialize(pretty=True) if not dry_run: obj.save() print "ingested %s as %s" % (unicode(c.head).encode('latin-1'), obj.pid) ingested += 1 # summarize what was done print "Found %d postcards " % len(cards) print "Found %d postcard files " % files print "Ingested %d postcards " % ingested
def setUp(self): self.account = load_xmlobject_from_file(self.FIXTURE_FILE, cerp.Account) self.folder = self.account.folders[0] self.message = self.folder.messages[0]
def setUp(self): self.ead = load_xmlobject_from_file(self.FIXTURE_FILE, eadmap.EncodedArchivalDescription)
def setUp(self): self.tei = load_xmlobject_from_file(self.FIXTURE_FILE, ExtendedTei)