def main(argv): fedora = connectToFedora("localhost", "fedoraAdmin", "password") if not fedora: print("Failed to connect to fedora instance") return 1 ### SCAN FOR OBJECTS oldModel = "ilives:pageCModel" newModel = "islandora:pageCModel" objects = getPidsForContentModel(oldModel) print("Found %d objects to update" % len(objects)) for pid in objects: strippedPid = pid.replace('info:fedora/', '') print(strippedPid) + " ...", try: obj = fedora.getObject(strippedPid) except FedoraConnectionException, fcx: print("Failed to connect to object %s" % pid) continue nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] rels_ext = fedora_relationships.rels_ext(obj, nsmap, 'fedora') editRelsExt(rels_ext, ["fedora-model", "hasModel"], newModel) commitRelsExt(rels_ext) print("Done")
def main(argv): """ Note: this script does not correct pageNS:pageProgression. We will need to detect if its there before we try to fix it since we only have to make a change if it is there """ fedora = connectToFedora("http://*****:*****@@dmin") if not fedora: print("Failed to connect to fedora instance") return 1 ### SCAN FOR BOOK OBJECTS oldModel = "archiveorg:bookCModel" newModel = "islandora:bookCModel" books = getPidsForContentModel(oldModel) print("Found %d book objects to update" % len(books)) for pid in books: strippedPid = pid.replace('info:fedora/', '') print(strippedPid) + " ...", try: obj = fedora.getObject(strippedPid) except FedoraConnectionException, fcx: print("Failed to connect to object %s" % pid) continue nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] rels_ext = fedora_relationships.rels_ext(obj, nsmap, 'fedora') editRelsExt(rels_ext, ["fedora-model", "hasModel"], newModel) commitRelsExt(rels_ext) print("Done")
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}): """ Create the RELS-EXT relationships between childObject and object:parentPid We set the default namespace for our interconnections, then apply the content model, and make childObject a member of the object:parentPid collection. If object:parentPid doesn't have the collection content model then strange things might happen. @param childObject The FedoraObject to attach the RELS-EXT to. @param parentPid The pid of the parent to assign to childObject. @param contentModel The @contentModel to give to childObject. @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data. @param extraRelationsips Any additional relationship values to assign to childObject. By default the object gets: hasModel:contentModel and isMemberOfCollection:parentPid """ nsmap = [ fedora_relationships.rels_namespace( 'fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace( 'fedora-model', 'info:fedora/fedora-system:def/model#') ] if extraNamespaces and type(extraNamespaces) is DictType: for k, v in extraNamespaces.iteritems(): nsmap.append(fedora_relationships.rels_namespace(k, v)) #add relationships rels_ext = fedora_relationships.rels_ext(childObject, nsmap, 'fedora') rels_ext.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"]) rels_ext.addRelationship( fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"]) if extraRelationships and type(extraRelationships) is DictType: for k, v in extraRelationships.iteritems(): rels_ext.addRelationship(k, [v, "literal"]) loop = True while loop: loop = False try: rels_ext.update() except FedoraConnectionException, fedoraEXL: if str(fedoraEXL.body).find( "is currently being modified by another thread") != -1: loop = True print( "Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid) else: print("Error updating obj(%s) RELS-EXT" % childObject.pid)
def islandoraMessage(self, method, message, client): if method == 'generateDerivatives': if 'pid' not in message: self.logger.error("No PID passed in message.") try: obj = client.getObject(message['pid']) comime = CoallianceMime(obj) for dsid in obj: self.processMessage(dsid, obj, comime) except: self.logger.exception('Pid does not exist. Pid %s' % message['pid']) self.logger.info('Derivative generation process complete for PID: %s' % message['pid']) elif method == 'regenerateDerivatives': if 'pid' not in message: self.logger.error("No PID passed in message.") try: obj = client.getObject(message['pid']) try: obj['TN'].delete() except: pass relsint = rels_int(obj, rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') relationships = relsint.getRelationships() try: obj['RELS-INT'].delete() except: pass for relationship in relationships: try: obj[relationship[2].data].delete() except: pass self.stomp.send('/topic/islandora', json.dumps(message), {'method' : 'generateDerivatives'}) except: self.logger.exception('Pid does not exist. Pid %s' % message['pid'])
def add_policy_to_rels(obj): # TODO: This isn't very efficient. If the users and roles are the same we shouldn't # bother updating the rels. It will do for now in testing however. logger = logging.getLogger( 'IslandoraListener.coalliance.add_policy_to_rels') policy_ds = obj['POLICY'] try: xacml = Xacml(policy_ds.getContent().read()) except XacmlException: return False relsext = rels_ext( obj, rels_namespace('islandora', 'http://islandora.ca/ontology/relsext#'), 'islandora') users = xacml.viewingRule.getUsers() roles = xacml.viewingRule.getRoles() logger.debug("Users in policy: %s." % users) logger.debug("Roles in policy: %s." % roles) #remove the old users and roles before we add new ones and have duplicates relsext.purgeRelationships(predicate='isViewableByUser') relsext.purgeRelationships(predicate='isViewableByRole') for user in users: relsext.addRelationship('isViewableByUser', rels_object(user, rels_object.LITERAL)) for role in roles: relsext.addRelationship('isViewableByRole', rels_object(role, rels_object.LITERAL)) relsext.update()
def add_policy_to_rels(obj): # TODO: This isn't very efficient. If the users and roles are the same we shouldn't # bother updating the rels. It will do for now in testing however. logger = logging.getLogger('IslandoraListener.coalliance.add_policy_to_rels') policy_ds = obj['POLICY'] try: xacml = Xacml(policy_ds.getContent().read()) except XacmlException: return False relsext = rels_ext(obj, rels_namespace('islandora','http://islandora.ca/ontology/relsext#'), 'islandora') users = xacml.viewingRule.getUsers() roles = xacml.viewingRule.getRoles() logger.debug("Users in policy: %s." % users) logger.debug("Roles in policy: %s." % roles) #remove the old users and roles before we add new ones and have duplicates relsext.purgeRelationships(predicate='isViewableByUser') relsext.purgeRelationships(predicate='isViewableByRole') for user in users: relsext.addRelationship('isViewableByUser', rels_object(user,rels_object.LITERAL)) for role in roles: relsext.addRelationship('isViewableByRole', rels_object(role,rels_object.LITERAL)) relsext.update()
def test_two_namespace_literal(self): xmlStr = """ <rdf:RDF xmlns:coal="http://www.coalliance.org/ontologies/relsint" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#" xmlns:jon="http://jebus/trainstation"> <rdf:Description rdf:about="info:fedora/coccc:2040"> <jon:feezle>JON</jon:feezle> </rdf:Description> </rdf:RDF> """ relationship = fedora_relationship([ rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), rels_namespace('jon', 'http://jebus/trainstation') ]) relationship.addRelationship('coccc:2040', rels_predicate('jon', 'feezle'), rels_object('JON', rels_object.LITERAL)) result_string = XmlHelper.mangle(relationship.toString()) expected_string = XmlHelper.mangle(xmlStr) self.assertEqual(result_string, expected_string, 'Generated XML Incorrect')
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}): """ Create the RELS-EXT relationships between childObject and object:parentPid We set the default namespace for our interconnections, then apply the content model, and make childObject a member of the object:parentPid collection. If object:parentPid doesn't have the collection content model then strange things might happen. @param childObject The FedoraObject to attach the RELS-EXT to. @param parentPid The pid of the parent to assign to childObject. @param contentModel The @contentModel to give to childObject. @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data. @param extraRelationsips Any additional relationship values to assign to childObject. By default the object gets: hasModel:contentModel and isMemberOfCollection:parentPid """ nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] if extraNamespaces and type(extraNamespaces) is DictType: for k, v in extraNamespaces.iteritems(): nsmap.append(fedora_relationships.rels_namespace(k, v)) #add relationships rels_ext=fedora_relationships.rels_ext(childObject, nsmap, 'fedora') rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"]) rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"]) if extraRelationships and type(extraRelationships) is DictType: for k, v in extraRelationships.iteritems(): rels_ext.addRelationship(k, [v, "pid"]) loop = True while loop: loop = False try: rels_ext.update() except FedoraConnectionException, fedoraEXL: if str(fedoraEXL.body).find("is currently being modified by another thread") != -1: loop = True print("Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid) else: print("Error updating obj(%s) RELS-EXT" % childObject.pid)
def fedoraMessage(self, message, obj, client): # if this is a ingest method, then we want to do actions for each datastream comime = CoallianceMime(obj) if message['method'] == 'ingest': for dsid in obj: self.processMessage(dsid, obj, comime) # clean up the rels if this was a purge elif message['method'] == 'purgeDatastream': relsint = rels_int(obj, rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') relsint.purgeRelationships(subject=message['dsid']) relsint.purgeRelationships(object=message['dsid']) relsint.update() # else we just mess with the one that was changed elif message['dsid']: self.processMessage(message['dsid'], obj, comime)
def fedoraMessage(self, message, obj, client): # if this is a ingest method, then we want to do actions for each datastream comime = CoallianceMime(obj) if message['method'] == 'ingest': for dsid in obj: self.processMessage(dsid, obj, comime) # clean up the rels if this was a purge elif message['method'] == 'purgeDatastream': relsint = rels_int(obj, rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') relsint.purgeRelationships(subject=message['dsid']) relsint.purgeRelationships(object=message['dsid']) relsint.update() # else we just mess with the one that was changed elif message['dsid']: self.processMessage(message['dsid'], obj, comime)
def setUp(self): xml = """ <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:fedora="info:fedora/fedora-system:def/relations-external#" xmlns:fedora-model="info:fedora/fedora-system:def/model#" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"> <rdf:Description rdf:about="info:fedora/cogru:1332"> <fedora:isMemberOfCollection rdf:resource="info:fedora/cogru:1130"></fedora:isMemberOfCollection> <fedora-model:hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/cogru:cogruETD"></fedora-model:hasModel> </rdf:Description> </rdf:RDF> """ self.xml = xml self.relsext = rels_ext_string( 'cogru:1332', rels_namespace('islandora', 'http://islandora.ca/ontology/relsext#'), 'islandora', xml)
def dispatch(self, dsid): self.relsint = rels_int(self.obj, rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') self.dsid = dsid try: # translate - / + . into _ for the mimetype function trantab = string.maketrans('-/+.','____') if(self.obj[dsid].mimeType): mime = self.obj[dsid].mimeType.encode('ascii') mime_function_name = mime.translate(trantab) # get the function from the self object and run it mime_function = getattr( self, mime_function_name, self.mimetype_none ) mime_function() except KeyError: # we catch a key error because .mimeType throws one # if no mimeType is defined pass
def islandoraMessage(self, method, message, client): if method == 'generateDerivatives': if 'pid' not in message: self.logger.error("No PID passed in message.") try: obj = client.getObject(message['pid']) comime = CoallianceMime(obj) for dsid in obj: self.processMessage(dsid, obj, comime) if 'TN' not in obj: self.logger.info('No TN datastream on object, attempting to find a derivative to copy') for dsid in obj: if re.search(".*-tn.jpg\Z", dsid, 0): try: self.logger.info('Creating TN DS from %s .' % dsid) comime.create_thumbnail(obj, dsid, 'TN') break; except: self.logger.exception('Could not create TN Datastream from %s .' % dsid) except: self.logger.exception('Pid does not exist. Pid %s' % message['pid']) self.logger.info('Derivative generation process complete for PID: %s' % message['pid']) elif method == 'regenerateDerivatives': if 'pid' not in message: self.logger.error("No PID passed in message.") try: obj = client.getObject(message['pid']) try: obj['TN'].delete() except: pass relsint = rels_int(obj, rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') relationships = relsint.getRelationships() try: obj['RELS-INT'].delete() except: pass for relationship in relationships: try: obj[relationship[2].data].delete() except: pass self.stomp.send('/topic/islandora', json.dumps(message), {'method' : 'generateDerivatives'}) except: self.logger.exception('Pid does not exist. Pid %s' % message['pid'])
def islandoraMessage(self, method, message, client): if method == 'generateDerivatives': if 'pid' not in message: self.logger.error("No PID passed in message.") try: obj = client.getObject(message['pid']) comime = CoallianceMime(obj) for dsid in obj: self.processMessage(dsid, obj, comime) except: self.logger.exception('Pid does not exist. Pid %s' % message['pid']) self.logger.info( 'Derivative generation process complete for PID: %s' % message['pid']) elif method == 'regenerateDerivatives': if 'pid' not in message: self.logger.error("No PID passed in message.") try: obj = client.getObject(message['pid']) try: obj['TN'].delete() except: pass relsint = rels_int( obj, rels_namespace( 'coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') relationships = relsint.getRelationships() try: obj['RELS-INT'].delete() except: pass for relationship in relationships: try: obj[relationship[2].data].delete() except: pass self.stomp.send('/topic/islandora', json.dumps(message), {'method': 'generateDerivatives'}) except: self.logger.exception('Pid does not exist. Pid %s' % message['pid'])
def test_one_namespace_literal(self): xmlStr = """ <rdf:RDF xmlns:coal="http://www.coalliance.org/ontologies/relsint" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#"> <rdf:Description rdf:about="info:fedora/coccc:2040"> <coal:HasAwesomeness>JON</coal:HasAwesomeness> </rdf:Description> </rdf:RDF> """ relationship = fedora_relationship( rels_namespace('coal', 'http://www.coalliance.org/ontologies/relsint'), 'coal') relationship.addRelationship('coccc:2040', 'HasAwesomeness', rels_object('JON', rels_object.LITERAL)) result_string = XmlHelper.mangle(relationship.toString()) expected_string = XmlHelper.mangle(xmlStr) self.assertEqual(result_string, expected_string, 'Generated XML Incorrect')
### SCAN FOR PAGE OBJECTS oldModel = "archiveorg:pageCModel" newModel = "islandora:pageCModel" pages = getPidsForContentModel(oldModel) print("Found %d page objects to update" % len(pages)) for pid in pages: strippedPid = pid.replace('info:fedora/', '') print(strippedPid) + " ...", try: obj = fedora.getObject(strippedPid) except FedoraConnectionException, fcx: print("Failed to connect to object %s" % pid) continue nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#'), fedora_relationships.rels_namespace('pageNS', 'info:islandora/islandora-system:def/pageinfo#') ] rels_ext = fedora_relationships.rels_ext(obj, nsmap, 'fedora') # get the page number value = rels_ext.getRelationships(predicate=["pageNS", "isPageNumber"]) number = int(str(value[0][2])) # get the parent object value = rels_ext.getRelationships(predicate=["fedora", "isMemberOf"]) if value == []: value = rels_ext.getRelationships(predicate=["fedora", "isMemberOfCollection"]) if value: parent = str(value[0][2])
from islandoraUtils.metadata import fedora_relationships from lxml import etree from StringIO import StringIO if __name__ == '__main__': if len(sys.argv) == 2: source_directory = sys.argv[1] else: print('Please verify source directory.') sys.exit(-1) ''' setup ''' macrepo_rdf_name_space = fedora_relationships.rels_namespace('macrepo', 'http://repository.mcmaster.ca/ontology#') fedora_model_namespace = fedora_relationships.rels_namespace('fedora-model','info:fedora/fedora-system:def/model#') #configure logging log_directory = os.path.join(source_directory,'logs') if not os.path.isdir(log_directory): os.mkdir(log_directory) logFile = os.path.join(log_directory,'/big2/dc/Digital-Collections/archival-objects/ITM' + time.strftime('%y_%m_%d') + '.log') logging.basicConfig(filename=logFile, level=logging.DEBUG) #get config config = ConfigParser.ConfigParser() config.read(os.path.join(source_directory,'mcmaster.cfg')) solrUrl = config.get('Solr','url') fedoraUrl = config.get('Fedora','url')
import logging, sys, os, ConfigParser, time, subprocess #, shutil from fcrepo.connection import Connection, FedoraConnectionException from fcrepo.client import FedoraClient from islandoraUtils.metadata import fedora_relationships from lxml import etree if __name__ == '__main__': if len(sys.argv) == 2: source_directory = sys.argv[1] else: print('Please verify source directory.') sys.exit(-1) ''' setup ''' hamilton_rdf_name_space = fedora_relationships.rels_namespace( 'hamilton', 'http://hamilton.org/ontology#') fedora_model_namespace = fedora_relationships.rels_namespace( 'fedora-model', 'info:fedora/fedora-system:def/model#') #configure logging log_directory = os.path.join(source_directory, 'logs') if not os.path.isdir(log_directory): os.mkdir(log_directory) logFile = os.path.join( log_directory, 'Hamilton_batch_letter_ingest' + time.strftime('%y_%m_%d') + '.log') logging.basicConfig(filename=logFile, level=logging.DEBUG) #get config config = ConfigParser.ConfigParser() config.read(os.path.join(source_directory, 'HAMILTON.cfg'))
def process(self): #self.logger.debug('Received line: %s' % self.element) info = dict() for part, value in zip(['forename', 'surname', 'birth_date', 'death_date', 'alt_forename', 'ceacs_member', 'academic_page', 'ceacs_arrival', 'ceacs_depart', 'phd_date', 'photo'], self.element): val = unicode(value.strip(), 'UTF-8', 'replace') if val or part in ['birth_date', 'death_date']: info[part] = val if 'photo' not in info: return #self.logger.debug('info dictionary: %s' % info) info['full_name'] = "%(surname)s, %(forename)s" % info auth_record = FedoraWrapper.getNextObject(prefix=self.prefix, label=info['full_name'].encode('ascii', 'replace')) cpf = eaccpf(auth_record.pid) cpf.add_maintenance_event(agent="Adam Vessey, via ir_authority.py") cpf.add_bin_source(caption='Row in Excel spreadsheet', obj=str(self.element)) cpf.add_name_entry(name={ 'forename': info['forename'], 'surname': info['surname'] }) if 'alt_forename' in info: cpf.add_name_entry(name={ 'forename': info['alt_forename'], 'surname': info['surname'] }, role='alternative') cl = list() rel = list() if 'phd_date' in info: cl.append({ 'date': info['phd_date'], 'event': 'Achieved PhD' }) if 'ceacs_arrival' in info and 'ceacs_depart' in info: rel = { 'dateRange': { 'fromDate': info['ceacs_arrival'], 'toDate': info['ceacs_depart'] }, 'event': 'CEACS membership' } elif 'ceacs_arrival' in info: rel = { 'dateRange': { 'fromDate': info['ceacs_arrival'] }, 'event': 'CEACS membership' } elif 'ceacs_depart' in info: rel = { 'dateRange': { 'toDate': info['ceacs_depart'] }, 'event': 'CEACS membership' } if cl: cpf.add_chron_list(cl) if rel: cpf.add_relation(type='cpfRelation', url='http://digital.march.es/ceacs', elements=rel) if 'academic_page' in info: cpf.add_relation(type="resourceRelation", url=info['academic_page'], elements={'descriptiveNote': 'Academic page'}) cpf.add_exist_dates(info['birth_date'], info['death_date']) #print(cpf) Authority.save_etree(auth_record, cpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='X', hash='DISABLED') rels = FR.rels_ext(obj=auth_record, namespaces=Authority.NS.values()) rels.addRelationship(['fedora-model', 'hasModel'], ['ir:authorityCModel', 'pid']) rels.update() #Add image (with relationship to object?). if 'photo' in info: photo_path = self.getPath(info['photo']) if path.exists(photo_path): #Create the object... photo = FedoraWrapper.getNextObject(self.prefix, label=('Photo of %s' % info['full_name']).encode('ascii', 'replace')) #... add the datastream ... update_datastream(photo, 'JPG', filename=photo_path, label='Original image', checksumType='SHA-1', mimeType='image/jpeg') #... and relate the object. NSs = Authority.NS NSs['ir-rel'] = FR.rels_namespace('ir-rel', 'http://digital.march.es/ceacs#') p_rels = FR.rels_ext(photo, namespaces=NSs.values()) p_rels.addRelationship(['fedora-model', 'hasModel'], ['ir:photoCModel', 'pid']) p_rels.addRelationship(['ir-rel', 'iconOf'], [auth_record.pid, 'pid']) p_rels.update() else: self.logger.warning('photo: %s specified, but %s does not exist!' % (info['photo'], photo_path)) else: self.logger.debug('No photo specified.')
from fcrepo.client import FedoraClient from islandoraUtils.metadata import fedora_relationships from lxml import etree if __name__ == '__main__': if len(sys.argv) == 2: source_directory = sys.argv[1] else: print('Please verify source directory.') sys.exit(-1) ''' setup ''' hamilton_rdf_name_space = fedora_relationships.rels_namespace('hamilton', 'http://hamilton.org/ontology#') fedora_model_namespace = fedora_relationships.rels_namespace('fedora-model','info:fedora/fedora-system:def/model#') #configure logging log_directory = os.path.join(source_directory,'logs') if not os.path.isdir(log_directory): os.mkdir(log_directory) logFile = os.path.join(log_directory,'Hamilton_batch_letter_ingest' + time.strftime('%y_%m_%d') + '.log') logging.basicConfig(filename=logFile, level=logging.DEBUG) #get config config = ConfigParser.ConfigParser() config.read(os.path.join(source_directory,'HAMILTON.cfg')) #config.read(os.path.join(source_directory,'TEST.cfg')) solrUrl = config.get('Solr','url')