def rendermedia(filecontent): result = deURN(filecontent) rows = result.split('\n') FIELDS = rows[0].strip().split('\t') rows = rows[1:] result = [] for counter, row in enumerate(rows): row = row.strip() # seems there may be a stray \r still at the end of the string. if row == '' or row[0] == '#': continue row = row.split('\t') media = {'otherfields': []} media['counter'] = counter for i,r in enumerate(row): if FIELDS[i] == 'objectnumber': media['accession'] = row[i] elif FIELDS[i] == 'name': media['mainentry'] = row[i] # media['otherfields'].append({'label': 'File', 'value': row[i]}) elif FIELDS[i] == 'objectCSID': media['csid'] = row[i] elif FIELDS[i] == 'mediaCSID': media['media'] = row[i] elif FIELDS[i] == 'blobCSID': media['blobs'] = [ row[i] ] elif FIELDS[i] == 'creator': media['otherfields'].append({'label': 'Creator', 'value': row[i]}) elif FIELDS[i] == 'description': media['otherfields'].append({'label': 'Description', 'value': row[i]}) elif FIELDS[i] == 'date': media['otherfields'].append({'label': 'Image Date', 'value': row[i]}) result.append(media) return result
def reformat(filecontent): result = deURN(filecontent) result = result.replace('\n', '<tr><td>') result = result.replace('\t', '<td>') result = result.replace('|', '<td>') result = result.replace('False', '<span class="error">False</span>') result += '</table>' return '<table width="100%"><tr><td>\n' + result
def reformat(filecontent): result = deURN(filecontent) result = result.replace('\n','<tr><td>') result = result.replace('\t','<td>') result = result.replace('|','<td>') result = result.replace('False','<span class="error">False</span>') result += '</table>' return '<table width="100%"><tr><td>\n' + result
def extractTag(xml, tag): element = xml.find('.//%s' % tag) try: if "urn:" in element.text: element_text = deURN(str(element.text)) else: element_text = element.text except: element_text = '' return element_text
def rendermedia(filecontent): result = deURN(filecontent) rows = result.split('\n') FIELDS = rows[0].strip().split('\t') rows = rows[1:] result = [] for counter, row in enumerate(rows): row = row.strip( ) # seems there may be a stray \r still at the end of the string. if row == '' or row[0] == '#': continue row = row.split('\t') media = {'otherfields': []} media['counter'] = counter for i, r in enumerate(row): if FIELDS[i] == 'objectnumber': media['accession'] = row[i] elif FIELDS[i] == 'name': media['mainentry'] = row[i] # media['otherfields'].append({'label': 'File', 'value': row[i]}) elif FIELDS[i] == 'objectCSID': media['csid'] = row[i] elif FIELDS[i] == 'mediaCSID': media['media'] = row[i] elif FIELDS[i] == 'blobCSID': media['blobs'] = [row[i]] elif FIELDS[i] == 'creator': media['otherfields'].append({ 'label': 'Creator', 'value': row[i] }) elif FIELDS[i] == 'description': media['otherfields'].append({ 'label': 'Description', 'value': row[i] }) elif FIELDS[i] == 'date': media['otherfields'].append({ 'label': 'Image Date', 'value': row[i] }) result.append(media) return result
def extract_refname(xml, term, pgSz, record_type): try: cspaceXML = fromstring(xml) totalItems = int(cspaceXML.find('.//totalItems').text) if totalItems == 0: return 'ZeroResults X X X X'.split(' '), totalItems items = cspaceXML.findall('.//list-item') for i in items: csid = i.find('.//csid') csid = csid.text try: refName = extract_tag(i, 'refName') updated_at = extract_tag(i, 'updatedAt') if record_type == 'collectionobjects': try: termDisplayName = deURN(refName) except: raise else: try: termDisplayName = extract_tag(i, 'termDisplayName') except: try: termDisplayName = extract_tag(i, 'displayName') except: raise except: loginfo( 'csvimport', 'could not get termDisplayName or refName or updatedAt from %s' % csid, {}, {}) return 'Failed X X X X'.split(' '), totalItems if normalize(term) == normalize(termDisplayName): return ['OK', csid, str(termDisplayName), refName, updated_at], totalItems if totalItems > pgSz: return 'MaybeMissed X X X X'.split(' '), totalItems return ['NoMatch', '', term, term, 'X'], totalItems except: raise return 'xmlParseFailed X X X X'.split(' '), totalItems
def updateXML(fieldset, updateItems, xml): message = '' # Fields vary with fieldsets if fieldset == 'keyinfo': fieldList = ('pahmaFieldCollectionPlace', 'assocPeople', 'objectName', 'pahmaEthnographicFileCode') elif fieldset == 'namedesc': fieldList = ('briefDescription', 'objectName') elif fieldset == 'registration': # nb: 'pahmaAltNumType' is handled with 'pahmaAltNum' fieldList = ('objectName', 'pahmaAltNum', 'fieldCollector') elif fieldset == 'hsrinfo': fieldList = ('objectName', 'pahmaFieldCollectionPlace', 'briefDescription') elif fieldset == 'objtypecm': fieldList = ('objectName', 'collection', 'responsibleDepartment', 'pahmaFieldCollectionPlace', 'pahmaTmsLegacyDepartment') elif fieldset == 'collection': fieldList = ('objectName', 'collection') elif fieldset == 'placeanddate': fieldList = ('objectName', 'pahmaFieldLocVerbatim', 'pahmaFieldCollectionDate') elif fieldset == 'places': fieldList = ('pahmaFieldLocVerbatim', 'pahmaFieldCollectionPlace', 'objectProductionPlace', 'contentPlace') elif fieldset == 'dates': fieldList = ('objectProductionDate', 'pahmaFieldCollectionDate', 'contentDate', 'briefDescription') elif fieldset == 'mattax': fieldList = ('material', 'taxon', 'briefDescription') elif fieldset == 'student': fieldList = ('taxon', 'fieldLocCountry', 'fieldLocState', 'fieldLocCounty') elif fieldset == 'fullmonty': fieldList = ('assocPeople', 'briefDescription', 'collection', 'contentDate', 'contentPlace', 'fieldCollector', 'material', 'objectName', 'objectName', 'objectProductionDate', 'objectProductionPlace', 'objectProductionPerson', 'pahmaAltNum', 'pahmaEthnographicFileCode', 'pahmaFieldCollectionDate', 'pahmaFieldCollectionPlace', 'pahmaFieldLocVerbatim', 'pahmaObjectStatus', 'responsibleDepartment', 'taxon', 'material') root = etree.fromstring(xml) # add the user's changes to the XML for relationType in fieldList: # sys.stderr.write('tag1: %s\n' % relationType) # this app does not insert empty values into anything! if not relationType in updateItems.keys( ) or updateItems[relationType] == '': continue listSuffix = 'List' extra = '' if relationType in [ 'assocPeople', 'pahmaAltNum', 'pahmaFieldCollectionDate', 'objectProductionDate', 'objectProductionPlace', 'objectProductionPerson', 'contentDate', 'material', 'taxon', 'fieldLocCountry', 'fieldLocState', 'fieldLocCounty' ]: extra = 'Group' elif relationType in [ 'briefDescription', 'fieldCollector', 'responsibleDepartment', 'contentPlace' ]: listSuffix = 's' if relationType in [ 'collection', 'pahmaFieldLocVerbatim', 'contentDate', 'pahmaTmsLegacyDepartment' ]: listSuffix = '' else: pass # html += ">>> ",'.//'+relationType+extra+'List' # sys.stderr.write('tag2: %s\n' % (relationType + extra + listSuffix)) if relationType == 'taxon': tmprelationType = 'taxonomicIdent' elif relationType in [ 'fieldLocCountry', 'fieldLocState', 'fieldLocCounty' ]: tmprelationType = 'locality' else: tmprelationType = relationType metadata = root.findall('.//' + tmprelationType + extra + listSuffix) if 'objectNumber' in updateItems and updateItems['objectNumber'] == '': updateItems['objectNumber'] = root.find('.//objectNumber').text try: metadata = metadata[0] # there had better be only one! except: # hmmm ... we didn't find this element in the record. Make a note a carry on! # message += 'No "' + relationType + extra + listSuffix + '" element found to update.' continue # html += ">>> ",relationType,':',updateItems[relationType] if relationType in [ 'assocPeople', 'objectName', 'pahmaAltNum', 'material', 'taxon', 'objectProductionPerson', 'objectProductionPlace', 'fieldLocCountry', 'fieldLocState', 'fieldLocCounty' ]: # group = metadata.findall('.//'+relationType+'Group') # sys.stderr.write(' updateItem: ' + relationType + ':: ' + updateItems[relationType] + '\n' ) Entries = metadata.findall('.//' + relationType) if not alreadyExists(updateItems[relationType], Entries): try: newElement = metadata.findall('.//' + tmprelationType + 'Group')[0] except: newElement = etree.Element(tmprelationType + 'Group') leafElement = etree.Element(relationType) leafElement.text = updateItems[relationType] newElement.append(leafElement) if relationType in ['assocPeople', 'pahmaAltNum']: apgType = etree.Element(relationType + 'Type') # this needs to be a refname for PAHMA's assocpeopletype... apgType.text = updateItems[ relationType + 'Type'] if relationType == 'pahmaAltNum' else "urn:cspace:pahma.cspace.berkeley.edu:vocabularies:name(assocpeople):item:name(assocpeopletype06)'made by'" # sys.stderr.write(relationType + 'Type:' + updateItems[relationType + 'Type']) newElement.append(apgType) if (len(Entries) == 1 and Entries[0].text is None ) or tmprelationType == 'locality': # sys.stderr.write('reusing empty element: %s\n' % Entries[0].tag) # sys.stderr.write('ents : %s\n' % Entries[0].text) for child in metadata: # html += '<br>tag: ', child.tag if child.tag == tmprelationType + 'Group': # html += '<br> found it! ',child.tag metadata.remove(child) metadata.insert(0, newElement) else: metadata.insert(0, newElement) else: if IsAlreadyPreferred(updateItems[relationType], Entries): continue else: # exists, but not preferred. make it the preferred: remove it from where it is, insert it as 1st for child in metadata: if child.tag == tmprelationType + 'Group': checkval = child.find('.//' + relationType) if checkval.text == updateItems[relationType]: savechild = child metadata.remove(child) metadata.insert(0, savechild) pass # for AltNums, we need to update the AltNumType even if the AltNum hasn't changed if relationType == 'pahmaAltNum': apgType = metadata.find('.//' + relationType + 'Type') apgType.text = updateItems[relationType + 'Type'] # sys.stderr.write(' updated: pahmaAltNumType to' + updateItems[relationType + 'Type'] + '\n' ) elif relationType in [ 'briefDescription', 'fieldCollector', 'responsibleDepartment', 'contentPlace' ]: Entries = metadata.findall('.//' + relationType) # for e in Entries: # html += '%s, %s<br>' % (e.tag, e.text) # sys.stderr.write(' e: %s\n' % e.text) if alreadyExists(updateItems[relationType], Entries): if IsAlreadyPreferred(updateItems[relationType], Entries): # message += "%s exists as %s, already preferred;" % (updateItems[relationType],relationType) pass else: # exists, but not preferred. make it the preferred: remove it from where it is, insert it as 1st for child in Entries: sys.stderr.write(' c: %s\n' % child.tag) if child.text == updateItems[relationType]: new_element = child metadata.remove(child) # message += '%s removed. len = %s<br/>' % (child.text, len(Entries)) metadata.insert(0, new_element) message += "'%s' exists in %s, now preferred.<br/>" % ( deURN(updateItems[relationType]), relationType) # html += 'already exists: %s<br>' % updateItems[relationType] # check if the existing element is empty; if so, use it, don't add a new element else: if len(Entries) == 1 and Entries[0].text is None: # message += "removed %s ;<br/>" % (Entries[0].tag) metadata.remove(Entries[0]) new_element = etree.Element(relationType) new_element.text = updateItems[relationType] metadata.insert(0, new_element) message += "added '%s' as the preferred term in %s.<br/>" % ( deURN(updateItems[relationType]), relationType) elif relationType in [ 'objectProductionDate', 'pahmaFieldCollectionDate', 'contentDate' ]: # we'll be replacing the entire structured date group newDateGroup = etree.Element('%sGroup' % relationType) new_element = etree.Element('dateDisplayDate') new_element.text = updateItems[relationType] newDateGroup.insert(0, new_element) DateGroup = metadata.find('.//%sGroup' % relationType) if DateGroup is not None: metadata.remove(DateGroup) # one of many special cases... if relationType == 'contentDate': DateGroup = metadata.findall('.//*') [metadata.remove(d) for d in DateGroup] metadata.insert(0, new_element) else: metadata.insert(0, newDateGroup) else: # check if value is already present. if so, skip if alreadyExists(updateItems[relationType], metadata.findall('.//' + relationType)): if IsAlreadyPreferred(updateItems[relationType], metadata.findall('.//' + relationType)): continue else: message += "'%s' already exists as an NPT in %s: This value has been inserted as the PT and in doing so is now duplicated.<br/>" % ( deURN(updateItems[relationType]), relationType) pass newElement = etree.Element(relationType) newElement.text = updateItems[relationType] metadata.insert(0, newElement) objectCount = root.find('.//numberOfObjects') if 'objectCount' in updateItems: if objectCount is None: objectCount = etree.Element('numberOfObjects') collectionobjects_common = root.find( './/{http://collectionspace.org/services/collectionobject}collectionobjects_common' ) collectionobjects_common.insert(0, objectCount) objectCount.text = updateItems['objectCount'] inventoryCount = root.find('.//inventoryCount') if 'inventoryCount' in updateItems: if inventoryCount is None: inventoryCount = etree.Element('inventoryCount') collectionobjects_pahma = root.find( './/{http://collectionspace.org/services/collectionobject/local/pahma}collectionobjects_pahma' ) collectionobjects_pahma.insert(0, inventoryCount) inventoryCount.text = updateItems['inventoryCount'] for fld in 'pahmaTmsLegacyDepartment pahmaFieldLocVerbatim'.split(' '): if fld in updateItems and updateItems[fld] != '': fldtoupdate = root.find('.//' + fld) if fldtoupdate is None: fldtoupdate = etree.Element(fld) collectionobjects_pahma = root.find( './/{http://collectionspace.org/services/collectionobject/local/pahma}collectionobjects_pahma' ) collectionobjects_pahma.insert(0, fldtoupdate) fldtoupdate.text = updateItems[fld] collection = root.find('.//collection') if 'collection' in updateItems: if collection is None: collection = etree.Element('collection') collectionobjects_common = root.find( './/{http://collectionspace.org/services/collectionobject}collectionobjects_common' ) collectionobjects_common.insert(0, collection) message += " %s added as <%s>.<br/>" % (deURN( updateItems['collection']), 'collection') collection.text = updateItems['collection'] payload = '<?xml version="1.0" encoding="UTF-8"?>\n' + etree.tostring( root, encoding='unicode') # update collectionobject.. # html += "<br>pretending to post update to %s to REST API..." % updateItems['objectCsid'] # elapsedtimetotal = time.time() # messages = [] # messages.append("posting to %s REST API..." % uri) # print(payload) # messages.append(payload) return message, payload